From f648560774032718e13a2d17165d583b24faf73a Mon Sep 17 00:00:00 2001 From: Maria ABI RIZK Date: Mon, 13 Apr 2026 18:38:32 +0200 Subject: [PATCH] =?UTF-8?q?T=C3=A9l=C3=A9verser=20les=20fichiers=20vers=20?= =?UTF-8?q?"/"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ClusteringV4_400plus_Ancien_finMars.ipynb | 6631 +++++++++++++++++++++ 1 file changed, 6631 insertions(+) create mode 100644 ClusteringV4_400plus_Ancien_finMars.ipynb diff --git a/ClusteringV4_400plus_Ancien_finMars.ipynb b/ClusteringV4_400plus_Ancien_finMars.ipynb new file mode 100644 index 0000000..22a824e --- /dev/null +++ b/ClusteringV4_400plus_Ancien_finMars.ipynb @@ -0,0 +1,6631 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d9272d36", + "metadata": {}, + "source": [ + "# Clustering des 400+ gros comptes Carmignac — version enrichie\n", + "\n", + "Ce notebook repart du notebook initial et ajoute :\n", + "\n", + "- une base plus propre et plus lisible ;\n", + "- des **features comportementales** supplémentaires ;\n", + "- un **clustering global** sur les 400+ gros comptes ;\n", + "- un **clustering par fonds** (`Product - Isin`) sur les plus gros fonds ;\n", + "- un cadre prêt à intégrer des **données de performance Carmignac** si elles sont disponibles.\n", + "\n", + "## Principes retenus\n", + "\n", + "- univers = comptes avec **AUM total ≥ 5 M€** au **31/10/2025**\n", + "- exclusion de **Off Distribution** et **Private Clients**\n", + "- **pas de clipping d'outliers** pour l'instant\n", + "- un fonds est identifié par **`Product - Isin`**\n", + "- les résultats doivent être **interprétables métier**, pas seulement \"bons\" mathématiquement" + ] + }, + { + "cell_type": "markdown", + "id": "c6b14319", + "metadata": {}, + "source": [ + "## 1. Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1aa70c18", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "import re\n", + "import math\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import RobustScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.cluster import KMeans, AgglomerativeClustering\n", + "from sklearn.mixture import GaussianMixture\n", + "from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "pd.set_option(\"display.max_columns\", 200)\n", + "pd.set_option(\"display.max_rows\", 200)\n", + "\n", + "RANDOM_STATE = 42\n", + "EPS = 1e-9" + ] + }, + { + "cell_type": "markdown", + "id": "98deb42c", + "metadata": {}, + "source": [ + "## 2. Paths" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a462ff7a-1e4d-44f7-af6c-031b47779ce1", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
reg_origreg_usedAgreement - CodeCompany - IdCompany - Ultimate Parent IdRegistrar Account - RegionRegistrarAccount - CountryProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass CurrencyProduct - IsinCentralisation DateQuantity - AUMValue - AUM CCYValue - AUM €
01887218872L1042257.033675.0SwitzerlandSwitzerlandDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-01-3149094.9153.242523e+073.242523e+07
11887218872L1042257.033675.0SwitzerlandSwitzerlandEquityInvestissement LatitudeFCPNOCarmignac Investissement LatitudeAEURFR00101476032015-01-311717.0004.767422e+054.767422e+05
21887218872L1042257.033675.0SwitzerlandSwitzerlandEquityInvestissementFCPNOCarmignac InvestissementAEURFR00101489812015-01-318254.8709.862671e+069.862671e+06
31887218872L1042257.033675.0SwitzerlandSwitzerlandEquityEuro-EntrepreneursFCPNOCarmignac Euro-EntrepreneursAEURFR00101491122015-01-31278.9237.664525e+047.664525e+04
41887218872L1042257.033675.0SwitzerlandSwitzerlandFixed IncomeSécuritéFCPNOCarmignac SécuritéAW & AW-REURFR00101491202015-01-311807.2673.078318e+063.078318e+06
............................................................
1088393Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFranceDiversifiedInflation SolutionSICAVNOCarmignac Portfolio Inflation SolutionFEURLU27159543302025-10-3181065.4199.533293e+069.533293e+06
1088394Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFranceDiversifiedInflation SolutionSICAVNOCarmignac Portfolio Inflation SolutionAEURLU27159545042025-10-316853.3637.978685e+057.978685e+05
1088395Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFrancePrivate AssetsEvergreenSICAVNOCarmignac S.A. SICAV - PART II UCI Private Eve...AEURLU27994731242025-10-314212.2345.263608e+055.263608e+05
1088396Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFranceEquityTech SolutionsSICAVNOCarmignac Portfolio Tech SolutionsAEURLU28097942202025-10-3131469.5234.438147e+064.438147e+06
1088397Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFranceEquityTech SolutionsSICAVNOCarmignac Portfolio Tech SolutionsFEURLU28097945762025-10-31554.3017.871629e+047.871629e+04
\n", + "

1088398 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " reg_orig reg_used Agreement - Code Company - Id \\\n", + "0 18872 18872 L104 2257.0 \n", + "1 18872 18872 L104 2257.0 \n", + "2 18872 18872 L104 2257.0 \n", + "3 18872 18872 L104 2257.0 \n", + "4 18872 18872 L104 2257.0 \n", + "... ... ... ... ... \n", + "1088393 Private Client Private Client Private Client Private Client \n", + "1088394 Private Client Private Client Private Client Private Client \n", + "1088395 Private Client Private Client Private Client Private Client \n", + "1088396 Private Client Private Client Private Client Private Client \n", + "1088397 Private Client Private Client Private Client Private Client \n", + "\n", + " Company - Ultimate Parent Id Registrar Account - Region \\\n", + "0 33675.0 Switzerland \n", + "1 33675.0 Switzerland \n", + "2 33675.0 Switzerland \n", + "3 33675.0 Switzerland \n", + "4 33675.0 Switzerland \n", + "... ... ... \n", + "1088393 Private Client France \n", + "1088394 Private Client France \n", + "1088395 Private Client France \n", + "1088396 Private Client France \n", + "1088397 Private Client France \n", + "\n", + " RegistrarAccount - Country Product - Asset Type \\\n", + "0 Switzerland Diversified \n", + "1 Switzerland Equity \n", + "2 Switzerland Equity \n", + "3 Switzerland Equity \n", + "4 Switzerland Fixed Income \n", + "... ... ... \n", + "1088393 France Diversified \n", + "1088394 France Diversified \n", + "1088395 France Private Assets \n", + "1088396 France Equity \n", + "1088397 France Equity \n", + "\n", + " Product - Strategy Product - Legal Status Product - Is Dedie ? \\\n", + "0 Patrimoine FCP NO \n", + "1 Investissement Latitude FCP NO \n", + "2 Investissement FCP NO \n", + "3 Euro-Entrepreneurs FCP NO \n", + "4 Sécurité FCP NO \n", + "... ... ... ... \n", + "1088393 Inflation Solution SICAV NO \n", + "1088394 Inflation Solution SICAV NO \n", + "1088395 Evergreen SICAV NO \n", + "1088396 Tech Solutions SICAV NO \n", + "1088397 Tech Solutions SICAV NO \n", + "\n", + " Product - Fund \\\n", + "0 Carmignac Patrimoine \n", + "1 Carmignac Investissement Latitude \n", + "2 Carmignac Investissement \n", + "3 Carmignac Euro-Entrepreneurs \n", + "4 Carmignac Sécurité \n", + "... ... \n", + "1088393 Carmignac Portfolio Inflation Solution \n", + "1088394 Carmignac Portfolio Inflation Solution \n", + "1088395 Carmignac S.A. SICAV - PART II UCI Private Eve... \n", + "1088396 Carmignac Portfolio Tech Solutions \n", + "1088397 Carmignac Portfolio Tech Solutions \n", + "\n", + " Product - Shareclass Type Product - Shareclass Currency \\\n", + "0 A EUR \n", + "1 A EUR \n", + "2 A EUR \n", + "3 A EUR \n", + "4 AW & AW-R EUR \n", + "... ... ... \n", + "1088393 F EUR \n", + "1088394 A EUR \n", + "1088395 A EUR \n", + "1088396 A EUR \n", + "1088397 F EUR \n", + "\n", + " Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n", + "0 FR0010135103 2015-01-31 49094.915 3.242523e+07 \n", + "1 FR0010147603 2015-01-31 1717.000 4.767422e+05 \n", + "2 FR0010148981 2015-01-31 8254.870 9.862671e+06 \n", + "3 FR0010149112 2015-01-31 278.923 7.664525e+04 \n", + "4 FR0010149120 2015-01-31 1807.267 3.078318e+06 \n", + "... ... ... ... ... \n", + "1088393 LU2715954330 2025-10-31 81065.419 9.533293e+06 \n", + "1088394 LU2715954504 2025-10-31 6853.363 7.978685e+05 \n", + "1088395 LU2799473124 2025-10-31 4212.234 5.263608e+05 \n", + "1088396 LU2809794220 2025-10-31 31469.523 4.438147e+06 \n", + "1088397 LU2809794576 2025-10-31 554.301 7.871629e+04 \n", + "\n", + " Value - AUM € \n", + "0 3.242523e+07 \n", + "1 4.767422e+05 \n", + "2 9.862671e+06 \n", + "3 7.664525e+04 \n", + "4 3.078318e+06 \n", + "... ... \n", + "1088393 9.533293e+06 \n", + "1088394 7.978685e+05 \n", + "1088395 5.263608e+05 \n", + "1088396 4.438147e+06 \n", + "1088397 7.871629e+04 \n", + "\n", + "[1088398 rows x 19 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "PATH_AUM = \"s3://projet-bdc-carmignac-g3/paco/AUM_paths.csv\"\n", + "df_aum = pd.read_csv(PATH_AUM, sep=\",\")\n", + "df_aum" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8be2ec30", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# =========================\n", + "# PATHS PRINCIPAUX\n", + "# =========================\n", + "PATH_FLOWS = \"s3://projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv\"\n", + "PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\"\n", + "PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n", + "\n", + "# Optionnels\n", + "PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n", + "PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n", + "PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\"\n", + "\n", + "\n", + "PATH_CARMIGNAC_PERF = None\n", + "ADDITIONAL_PERF_PATHS = []" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "85f76368-fd8f-42fb-bb76-76e957bc221b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDreg_usedAgreement - CodeCompany - IdCompany - Ultimate Parent IdRegistrar Account - RegionRegistrarAccount - CountryProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass CurrencyProduct - IsinCentralisation DateQuantity - AUMValue - AUM CCYValue - AUM €
1088396Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFranceEquityTech SolutionsSICAVNOCarmignac Portfolio Tech SolutionsAEURLU28097942202025-10-3131469.5234.438147e+064.438147e+06
1088397Private ClientPrivate ClientPrivate ClientPrivate ClientPrivate ClientFranceFranceEquityTech SolutionsSICAVNOCarmignac Portfolio Tech SolutionsFEURLU28097945762025-10-31554.3017.871629e+047.871629e+04
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID reg_used Agreement - Code \\\n", + "1088396 Private Client Private Client Private Client \n", + "1088397 Private Client Private Client Private Client \n", + "\n", + " Company - Id Company - Ultimate Parent Id \\\n", + "1088396 Private Client Private Client \n", + "1088397 Private Client Private Client \n", + "\n", + " Registrar Account - Region RegistrarAccount - Country \\\n", + "1088396 France France \n", + "1088397 France France \n", + "\n", + " Product - Asset Type Product - Strategy Product - Legal Status \\\n", + "1088396 Equity Tech Solutions SICAV \n", + "1088397 Equity Tech Solutions SICAV \n", + "\n", + " Product - Is Dedie ? Product - Fund \\\n", + "1088396 NO Carmignac Portfolio Tech Solutions \n", + "1088397 NO Carmignac Portfolio Tech Solutions \n", + "\n", + " Product - Shareclass Type Product - Shareclass Currency \\\n", + "1088396 A EUR \n", + "1088397 F EUR \n", + "\n", + " Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n", + "1088396 LU2809794220 2025-10-31 31469.523 4.438147e+06 \n", + "1088397 LU2809794576 2025-10-31 554.301 7.871629e+04 \n", + "\n", + " Value - AUM € \n", + "1088396 4.438147e+06 \n", + "1088397 7.871629e+04 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_aum.columns #reg_orig comme cle de donnee\n", + "df_aum = df_aum.rename(columns={\"reg_orig\": \"Registrar Account - ID\"})\n", + "df_aum.tail(2)" + ] + }, + { + "cell_type": "markdown", + "id": "da4bd8b3", + "metadata": {}, + "source": [ + "## 3. Fonctions utilitaires" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "929b77fb", + "metadata": {}, + "outputs": [], + "source": [ + "def first_existing_col(df, candidates):\n", + " if df is None:\n", + " return None\n", + " for c in candidates:\n", + " if c in df.columns:\n", + " return c\n", + " return None\n", + "\n", + "def normalize_month(s):\n", + " return pd.to_datetime(s, errors=\"coerce\").dt.to_period(\"M\").dt.to_timestamp()\n", + "\n", + "def robust_entropy(weights):\n", + " arr = np.asarray(weights, dtype=float)\n", + " arr = arr[np.isfinite(arr)]\n", + " arr = arr[arr > 0]\n", + " if len(arr) == 0:\n", + " return np.nan\n", + " p = arr / arr.sum()\n", + " return -(p * np.log(p + EPS)).sum()\n", + "\n", + "def herfindahl(weights):\n", + " arr = np.asarray(weights, dtype=float)\n", + " arr = arr[np.isfinite(arr)]\n", + " arr = arr[arr > 0]\n", + " if len(arr) == 0:\n", + " return np.nan\n", + " p = arr / arr.sum()\n", + " return np.sum(p ** 2)\n", + "\n", + "def compute_trend(y):\n", + " y = np.asarray(y, dtype=float)\n", + " if len(y) < 4:\n", + " return np.nan\n", + " x = np.arange(len(y)).reshape(-1, 1)\n", + " mask = np.isfinite(y)\n", + " if mask.sum() < 4:\n", + " return np.nan\n", + " reg = LinearRegression().fit(x[mask], y[mask])\n", + " return float(reg.coef_[0])\n", + "\n", + "def compute_beta(y, x):\n", + " y = np.asarray(y, dtype=float)\n", + " x = np.asarray(x, dtype=float)\n", + " mask = np.isfinite(y) & np.isfinite(x)\n", + " if mask.sum() < 4:\n", + " return np.nan\n", + " xv = x[mask].reshape(-1, 1)\n", + " reg = LinearRegression().fit(xv, y[mask])\n", + " return float(reg.coef_[0])\n", + "\n", + "def compute_corr(x, y):\n", + " x = pd.Series(x, dtype=float)\n", + " y = pd.Series(y, dtype=float)\n", + " ok = x.notna() & y.notna()\n", + " if ok.sum() < 4:\n", + " return np.nan\n", + " return x[ok].corr(y[ok])\n", + "\n", + "def safe_div(a, b):\n", + " return a / (b + EPS)\n", + "\n", + "\n", + "def adaptive_floor(values, q=0.10, min_floor=1.0):\n", + " s = pd.to_numeric(pd.Series(values), errors=\"coerce\")\n", + " s = s[np.isfinite(s) & (s > 0)]\n", + " if len(s) == 0:\n", + " return float(min_floor)\n", + " return float(max(s.quantile(q), min_floor))\n", + "\n", + "def cluster_balance_summary(labels):\n", + " vc = pd.Series(labels).value_counts().sort_index()\n", + " n = int(vc.sum())\n", + " return {\n", + " \"n_clusters\": int(len(vc)),\n", + " \"min_cluster_size\": int(vc.min()),\n", + " \"max_cluster_size\": int(vc.max()),\n", + " \"dominant_cluster_share\": float(vc.max() / n) if n > 0 else np.nan,\n", + " \"singleton_clusters\": int((vc == 1).sum()),\n", + " }\n", + "\n", + "def diagnose_small_cluster_drivers(df, feature_cols, label_col=\"cluster_fund\", top_n=15):\n", + " if df is None or df.empty or label_col not in df.columns:\n", + " return pd.DataFrame()\n", + " vc = df[label_col].value_counts()\n", + " if vc.empty:\n", + " return pd.DataFrame()\n", + " small_cluster = vc.idxmin()\n", + " mask_small = df[label_col] == small_cluster\n", + " rows = []\n", + " for col in feature_cols:\n", + " if col not in df.columns:\n", + " continue\n", + " s = pd.to_numeric(df[col], errors=\"coerce\")\n", + " if s.notna().sum() < 4:\n", + " continue\n", + " small_med = s[mask_small].median()\n", + " rest_med = s[~mask_small].median()\n", + " q25 = s.quantile(0.25)\n", + " q75 = s.quantile(0.75)\n", + " iqr = q75 - q25\n", + " scale = iqr if pd.notna(iqr) and iqr > 0 else s.std()\n", + " if pd.isna(scale) or scale <= 0:\n", + " scale = 1.0\n", + " robust_gap = abs(small_med - rest_med) / (scale + EPS)\n", + " rows.append({\n", + " \"feature\": col,\n", + " \"small_cluster_label\": small_cluster,\n", + " \"small_cluster_size\": int(mask_small.sum()),\n", + " \"small_cluster_median\": small_med,\n", + " \"rest_median\": rest_med,\n", + " \"abs_gap\": abs(small_med - rest_med),\n", + " \"robust_gap_iqr\": robust_gap,\n", + " \"q99\": s.quantile(0.99),\n", + " \"max\": s.max(),\n", + " })\n", + " out = pd.DataFrame(rows)\n", + " if out.empty:\n", + " return out\n", + " return out.sort_values([\"robust_gap_iqr\", \"abs_gap\"], ascending=False).head(top_n).reset_index(drop=True)\n", + "\n", + "def add_reading_comment(diag_df):\n", + " if diag_df is None or diag_df.empty:\n", + " print(\"Aucun diagnostic disponible.\")\n", + " return\n", + " msg = []\n", + " if \"min_cluster_size\" in diag_df.columns:\n", + " min_min = pd.to_numeric(diag_df[\"min_cluster_size\"], errors=\"coerce\").min()\n", + " if pd.notna(min_min) and min_min <= 1:\n", + " msg.append(\"- Des singletons apparaissent : le clustering isole au moins un client extrême.\")\n", + " if {\"max_cluster_size\", \"n_clusters\"}.issubset(diag_df.columns):\n", + " pass\n", + " if \"dominant_cluster_share\" in diag_df.columns:\n", + " dom = pd.to_numeric(diag_df[\"dominant_cluster_share\"], errors=\"coerce\").max()\n", + " if pd.notna(dom) and dom > 0.8:\n", + " msg.append(\"- Un cluster domine très largement la population : la séparation est surtout masse principale vs outliers.\")\n", + " if \"silhouette\" in diag_df.columns:\n", + " sil = pd.to_numeric(diag_df[\"silhouette\"], errors=\"coerce\").max()\n", + " if pd.notna(sil) and sil > 0.9:\n", + " msg.append(\"- Une silhouette très élevée avec des singletons est souvent artificielle : elle récompense l'isolement d'outliers plus qu'une segmentation métier riche.\")\n", + " if not msg:\n", + " msg.append(\"- Les métriques ne signalent pas de déséquilibre extrême évident.\")\n", + " print(\"\\n\".join(msg))\n" + ] + }, + { + "cell_type": "markdown", + "id": "5479d218", + "metadata": {}, + "source": [ + "## 4. Chargement des données" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "26ba7a06", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "flows: (2574461, 24)\n", + "aum: (1088398, 19)\n", + "nav: (623914, 5)\n", + "rates: (2826, 2)\n", + "comp_perf: (2370192, 5)\n", + "peers: (31, 12)\n", + "perf_carm: None\n" + ] + } + ], + "source": [ + "df_flows = pd.read_csv(PATH_FLOWS, sep=\";\")\n", + "df_nav = pd.read_csv(PATH_NAV, sep=\";\")\n", + "df_rates = pd.read_csv(PATH_RATES, sep=\";\")\n", + "\n", + "df_comp_flows = pd.read_csv(PATH_COMP_FLOWS, sep=\";\")\n", + "df_comp_perf = pd.read_csv(PATH_COMP_PERF, sep=\";\")\n", + "\n", + "# parfois le fichier peers est séparé par |\n", + "df_peers = pd.read_csv(PATH_PEERS, sep=\"|\")\n", + "if df_peers is None:\n", + " df_peers = pd.read_csv(PATH_PEERS, sep=\";\")\n", + "\n", + "df_perf_carm = pd.read_csv(PATH_CARMIGNAC_PERF, sep=\";\") if PATH_CARMIGNAC_PERF else None\n", + "\n", + "extra_perf = []\n", + "for p in ADDITIONAL_PERF_PATHS:\n", + " df_tmp = pd.read_csv(p, sep=\";\")\n", + " if df_tmp is not None:\n", + " extra_perf.append(df_tmp)\n", + "\n", + "print(\"flows:\", None if df_flows is None else df_flows.shape)\n", + "print(\"aum:\", None if df_aum is None else df_aum.shape)\n", + "print(\"nav:\", None if df_nav is None else df_nav.shape)\n", + "print(\"rates:\", None if df_rates is None else df_rates.shape)\n", + "print(\"comp_perf:\", None if df_comp_perf is None else df_comp_perf.shape)\n", + "print(\"peers:\", None if df_peers is None else df_peers.shape)\n", + "print(\"perf_carm:\", None if df_perf_carm is None else df_perf_carm.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "c6890a35", + "metadata": {}, + "source": [ + "## 5. Définition des colonnes-clés" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b9ad4a32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ID_COL': 'Registrar Account - ID',\n", + " 'ISIN_COL': 'Product - Isin',\n", + " 'FLOW_DATE_COL': 'Centralisation Date',\n", + " 'AUM_DATE_COL': 'Centralisation Date',\n", + " 'NAV_DATE_COL': 'Dat',\n", + " 'NAV_ISIN_COL': 'Isin',\n", + " 'NAV_PRICE_COL': 'Price (TF PartPrice)',\n", + " 'RATE_DATE_COL': 'Date',\n", + " 'RATE_VAL_COL': 'Yld to Maturity',\n", + " 'REGION_COL': 'Registrar Account - Region',\n", + " 'COUNTRY_COL': 'RegistrarAccount - Country'}" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ID_COL = \"Registrar Account - ID\"\n", + "ISIN_COL = \"Product - Isin\"\n", + "\n", + "FLOW_DATE_COL = \"Centralisation Date\"\n", + "AUM_DATE_COL = \"Centralisation Date\"\n", + "\n", + "FLOW_QTY_COL = \"Quantity - NetFlows\"\n", + "FLOW_SUB_COL = \"Quantity - Subscription\"\n", + "FLOW_RED_COL = \"Quantity - Redemption\"\n", + "\n", + "AUM_QTY_COL = \"Quantity - AUM\"\n", + "AUM_VAL_COL = \"Value - AUM €\"\n", + "\n", + "REGION_COL = \"Registrar Account - Region\"\n", + "COUNTRY_COL = \"RegistrarAccount - Country\"\n", + "\n", + "NAV_DATE_COL = \"Dat\"\n", + "NAV_ISIN_COL = \"Isin\"\n", + "NAV_PRICE_COL = \"Price (TF PartPrice)\"\n", + "NAV_BENCH_COL = \"PriceBench\"\n", + "\n", + "RATE_DATE_COL = \"Date\"\n", + "RATE_VAL_COL = \"Yld to Maturity\"\n", + "\n", + "display({\n", + " \"ID_COL\": ID_COL,\n", + " \"ISIN_COL\": ISIN_COL,\n", + " \"FLOW_DATE_COL\": FLOW_DATE_COL,\n", + " \"AUM_DATE_COL\": AUM_DATE_COL,\n", + " \"NAV_DATE_COL\": NAV_DATE_COL,\n", + " \"NAV_ISIN_COL\": NAV_ISIN_COL,\n", + " \"NAV_PRICE_COL\": NAV_PRICE_COL,\n", + " \"RATE_DATE_COL\": RATE_DATE_COL,\n", + " \"RATE_VAL_COL\": RATE_VAL_COL,\n", + " \"REGION_COL\": REGION_COL,\n", + " \"COUNTRY_COL\": COUNTRY_COL,\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "901bed00", + "metadata": {}, + "source": [ + "## 6. Préparation des dates et types" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "68a2e8e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Agreement - CodeCompany - IdCompany - Ultimate Parent IdRegistrar Account - IDRegistrar Account - RegionRegistrarAccount - CountryProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass CurrencyProduct - IsinCentralisation DateQuantity - SubscriptionQuantity - RedemptionQuantity - NetFlowsValue Ccy - SubscriptionValue Ccy - RedemptionValue Ccy - NetFlowsValue € - SubscriptionValue € - RedemptionValue € - NetFlowsmonth
0003166166200127202FranceFranceEquityInvestissementSICAVNOCarmignac Portfolio InvestissementFEURLU09926258392020-11-051636.000.01636.00280983.000.0280983.00280983.000.0280983.002020-11-01
1003166166406533FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-03-09144.690.0144.6999985.130.099985.1399985.130.099985.132015-03-01
\n", + "
" + ], + "text/plain": [ + " Agreement - Code Company - Id Company - Ultimate Parent Id \\\n", + "0 003 166 166 \n", + "1 003 166 166 \n", + "\n", + " Registrar Account - ID Registrar Account - Region \\\n", + "0 200127202 France \n", + "1 406533 France \n", + "\n", + " RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n", + "0 France Equity Investissement \n", + "1 France Diversified Patrimoine \n", + "\n", + " Product - Legal Status Product - Is Dedie ? \\\n", + "0 SICAV NO \n", + "1 FCP NO \n", + "\n", + " Product - Fund Product - Shareclass Type \\\n", + "0 Carmignac Portfolio Investissement F \n", + "1 Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency Product - Isin Centralisation Date \\\n", + "0 EUR LU0992625839 2020-11-05 \n", + "1 EUR FR0010135103 2015-03-09 \n", + "\n", + " Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n", + "0 1636.00 0.0 1636.00 \n", + "1 144.69 0.0 144.69 \n", + "\n", + " Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n", + "0 280983.00 0.0 280983.00 \n", + "1 99985.13 0.0 99985.13 \n", + "\n", + " Value € - Subscription Value € - Redemption Value € - NetFlows month \n", + "0 280983.00 0.0 280983.00 2020-11-01 \n", + "1 99985.13 0.0 99985.13 2015-03-01 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDreg_usedAgreement - CodeCompany - IdCompany - Ultimate Parent IdRegistrar Account - RegionRegistrarAccount - CountryProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass CurrencyProduct - IsinCentralisation DateQuantity - AUMValue - AUM CCYValue - AUM €month
01887218872L1042257.033675.0SwitzerlandSwitzerlandDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-01-3149094.9153.242523e+073.242523e+072015-01-01
11887218872L1042257.033675.0SwitzerlandSwitzerlandEquityInvestissement LatitudeFCPNOCarmignac Investissement LatitudeAEURFR00101476032015-01-311717.0004.767422e+054.767422e+052015-01-01
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID reg_used Agreement - Code Company - Id \\\n", + "0 18872 18872 L104 2257.0 \n", + "1 18872 18872 L104 2257.0 \n", + "\n", + " Company - Ultimate Parent Id Registrar Account - Region \\\n", + "0 33675.0 Switzerland \n", + "1 33675.0 Switzerland \n", + "\n", + " RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n", + "0 Switzerland Diversified Patrimoine \n", + "1 Switzerland Equity Investissement Latitude \n", + "\n", + " Product - Legal Status Product - Is Dedie ? \\\n", + "0 FCP NO \n", + "1 FCP NO \n", + "\n", + " Product - Fund Product - Shareclass Type \\\n", + "0 Carmignac Patrimoine A \n", + "1 Carmignac Investissement Latitude A \n", + "\n", + " Product - Shareclass Currency Product - Isin Centralisation Date \\\n", + "0 EUR FR0010135103 2015-01-31 \n", + "1 EUR FR0010147603 2015-01-31 \n", + "\n", + " Quantity - AUM Value - AUM CCY Value - AUM € month \n", + "0 49094.915 3.242523e+07 3.242523e+07 2015-01-01 \n", + "1 1717.000 4.767422e+05 4.767422e+05 2015-01-01 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "for df, date_col in [\n", + " (df_flows, FLOW_DATE_COL),\n", + " (df_aum, AUM_DATE_COL),\n", + " (df_nav, NAV_DATE_COL),\n", + " (df_rates, RATE_DATE_COL),\n", + "]:\n", + " if df is not None and date_col is not None and date_col in df.columns:\n", + " df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n", + " df[\"month\"] = df[date_col].dt.to_period(\"M\").dt.to_timestamp()\n", + "\n", + "for df in [df_flows, df_aum]:\n", + " if df is not None:\n", + " df[ID_COL] = df[ID_COL].astype(str).str.strip()\n", + " df[ISIN_COL] = df[ISIN_COL].astype(str).str.strip()\n", + "\n", + "if df_nav is not None and NAV_ISIN_COL is not None:\n", + " df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()\n", + "\n", + "display(df_flows.head(2))\n", + "display(df_aum.head(2))" + ] + }, + { + "cell_type": "markdown", + "id": "8b073eb2", + "metadata": {}, + "source": [ + "## 7. Sélection des 400+ gros comptes au 31/10/2025\n", + "\n", + "Conforme aux messages Carmignac :\n", + "- on travaille à la date de référence **31/10/2025**\n", + "- on exclut **Off Distribution** et **Private Clients**\n", + "- on garde les comptes avec **AUM total ≥ 5 M€**" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "049f4a35-5769-48db-8148-f057dddfbc44", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Registrar Account - ID', 'reg_used', 'Agreement - Code',\n", + " 'Company - Id', 'Company - Ultimate Parent Id',\n", + " 'Registrar Account - Region', 'RegistrarAccount - Country',\n", + " 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n", + " 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n", + " 'Product - Shareclass Currency', 'Product - Isin',\n", + " 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n", + " 'Value - AUM €', 'month'],\n", + " dtype='object')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_aum.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "bb71018b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de comptes >= 5M€ : 432\n", + "Couverture encours : 100.0 %\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDaum_qty_totalaum_val_totalweight_31102025
4194203502626183.4911.623308e+090.047517
3553647656715556.4961.383209e+090.040489
1602001274544513772.2018.784361e+080.025713
3523129332728411.5968.379604e+080.024529
2002001278093473162.6478.342839e+080.024421
4184202593600860.8888.296663e+080.024286
3573649072794302.2548.151083e+080.023860
4003664411944206.2617.707213e+080.022560
3583649292135207.5377.479766e+080.021895
3813655382695348.4567.200408e+080.021077
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID aum_qty_total aum_val_total weight_31102025\n", + "419 420350 2626183.491 1.623308e+09 0.047517\n", + "355 364765 6715556.496 1.383209e+09 0.040489\n", + "160 200127454 4513772.201 8.784361e+08 0.025713\n", + "352 312933 2728411.596 8.379604e+08 0.024529\n", + "200 200127809 3473162.647 8.342839e+08 0.024421\n", + "418 420259 3600860.888 8.296663e+08 0.024286\n", + "357 364907 2794302.254 8.151083e+08 0.023860\n", + "400 366441 1944206.261 7.707213e+08 0.022560\n", + "358 364929 2135207.537 7.479766e+08 0.021895\n", + "381 365538 2695348.456 7.200408e+08 0.021077" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "TARGET_DATE = pd.Timestamp(\"2025-10-01\")\n", + "EXCLUDED_IDS = [\"Off Distribution\", \"Private Clients\"]\n", + "AUM_THRESHOLD_EUR = 5_000_000\n", + "\n", + "df_top_accounts = (\n", + " df_aum[\n", + " (df_aum[\"month\"] == TARGET_DATE) &\n", + " (~df_aum[ID_COL].isin(EXCLUDED_IDS))\n", + " ]\n", + " .groupby(ID_COL, as_index=False)\n", + " .agg(\n", + " aum_qty_total=(AUM_QTY_COL, \"sum\"),\n", + " aum_val_total=(AUM_VAL_COL, \"sum\"),\n", + " )\n", + " .sort_values(\"aum_val_total\", ascending=False)\n", + ")\n", + "\n", + "df_top_accounts[\"weight_31102025\"] = df_top_accounts[\"aum_val_total\"] / df_top_accounts[\"aum_val_total\"].sum()\n", + "\n", + "top_accounts = df_top_accounts[df_top_accounts[\"aum_val_total\"] >= AUM_THRESHOLD_EUR].copy()\n", + "TOP_IDS = set(top_accounts[ID_COL].tolist())\n", + "\n", + "print(\"Nombre de comptes >= 5M€ :\", len(TOP_IDS))\n", + "print(\"Couverture encours :\", round(top_accounts[\"aum_val_total\"].sum() / df_top_accounts[\"aum_val_total\"].sum() * 100, 2), \"%\")\n", + "display(top_accounts.head(10))" + ] + }, + { + "cell_type": "markdown", + "id": "06fcabb4", + "metadata": {}, + "source": [ + "## 8. Filtrage des données sur les 400+ gros comptes" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "98e51fed", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_flows: (1501724, 25)\n", + "df_aum: (1088398, 20)\n", + "Nb comptes flows: 427\n", + "Nb comptes aum: 432\n" + ] + } + ], + "source": [ + "\n", + "df_flows = df_flows[df_flows[ID_COL].isin(TOP_IDS)].copy()\n", + "df_aum = df_aum[df_aum[ID_COL].isin(TOP_IDS)].copy()\n", + "\n", + "print(\"df_flows:\", df_flows.shape)\n", + "print(\"df_aum:\", df_aum.shape)\n", + "print(\"Nb comptes flows:\", df_flows[ID_COL].nunique())\n", + "print(\"Nb comptes aum:\", df_aum[ID_COL].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cdfa3b92-286c-40c3-aac8-f7972be9d5ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nb comptes AUM : 432\n", + "Nb comptes flows : 427\n", + "Intersection : 427\n", + "AUM seulement : 5\n", + "Flows seulement : 0\n", + "Exemples AUM seulement : ['200142554', '200139346', '200131477', '200127798', '200130743']\n", + "Exemples Flows seulement : []\n" + ] + } + ], + "source": [ + "aum_ids = set(df_aum[\"Registrar Account - ID\"].dropna().unique())\n", + "flow_ids = set(df_flows[\"Registrar Account - ID\"].dropna().unique())\n", + "\n", + "print(\"Nb comptes AUM :\", len(aum_ids))\n", + "print(\"Nb comptes flows :\", len(flow_ids))\n", + "print(\"Intersection :\", len(aum_ids & flow_ids))\n", + "print(\"AUM seulement :\", len(aum_ids - flow_ids))\n", + "print(\"Flows seulement :\", len(flow_ids - aum_ids))\n", + "\n", + "print(\"Exemples AUM seulement :\", list(aum_ids - flow_ids)[:10])\n", + "print(\"Exemples Flows seulement :\", list(flow_ids - aum_ids)[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "fefe8a12-a98f-4220-bfbe-c0f2cd6d4ff2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_flows columns: Index(['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id',\n", + " 'Registrar Account - ID', 'Registrar Account - Region',\n", + " 'RegistrarAccount - Country', 'Product - Asset Type',\n", + " 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?',\n", + " 'Product - Fund', 'Product - Shareclass Type',\n", + " 'Product - Shareclass Currency', 'Product - Isin',\n", + " 'Centralisation Date', 'Quantity - Subscription',\n", + " 'Quantity - Redemption', 'Quantity - NetFlows',\n", + " 'Value Ccy - Subscription', 'Value Ccy - Redemption',\n", + " 'Value Ccy - NetFlows', 'Value € - Subscription',\n", + " 'Value € - Redemption', 'Value € - NetFlows', 'month'],\n", + " dtype='object')\n", + "df_aum columns: Index(['Registrar Account - ID', 'reg_used', 'Agreement - Code',\n", + " 'Company - Id', 'Company - Ultimate Parent Id',\n", + " 'Registrar Account - Region', 'RegistrarAccount - Country',\n", + " 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n", + " 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n", + " 'Product - Shareclass Currency', 'Product - Isin',\n", + " 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n", + " 'Value - AUM €', 'month'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(\"df_flows columns:\", df_flows.columns)\n", + "print(\"df_aum columns:\", df_aum.columns)" + ] + }, + { + "cell_type": "markdown", + "id": "94d7c460", + "metadata": {}, + "source": [ + "## 9. Base fine mensuelle flows : `client × ISIN × month`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4ac20a75-bf00-4f61-b0ec-d1c18d4fa665", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
columnn_groups_observedpct_groups_1_valuepct_groups_gt1_valuemax_n_uniquemean_n_unique
0Centralisation Date2757620.3235910.676409235.443382
1Value € - NetFlows2757620.3238880.676112375.435386
2Value Ccy - NetFlows2757620.3239020.676098375.435190
3Quantity - NetFlows2757620.3258790.674121375.361130
4Value € - Redemption2757620.4097850.590215344.152294
5Value Ccy - Redemption2757620.4097880.590212344.152120
6Quantity - Redemption2757620.4109160.589084344.090861
7Value € - Subscription2757620.4433240.556676313.716030
8Value Ccy - Subscription2757620.4433350.556665313.715904
9Quantity - Subscription2757620.4443070.555693303.667652
10Company - Id2754400.6155900.38441031.384432
11Company - Ultimate Parent Id2754400.6156000.38440021.384400
12Registrar Account - Region2757620.9993070.00069321.000693
13RegistrarAccount - Country2757620.9993070.00069321.000693
14Agreement - Code2757620.9998880.00011221.000112
15Product - Asset Type2755291.0000000.00000011.000000
16Product - Strategy2757611.0000000.00000011.000000
17Product - Legal Status2757621.0000000.00000011.000000
18Product - Is Dedie ?2757621.0000000.00000011.000000
19Product - Fund2757621.0000000.00000011.000000
20Product - Shareclass Type2757621.0000000.00000011.000000
21Product - Shareclass Currency2757621.0000000.00000011.000000
\n", + "
" + ], + "text/plain": [ + " column n_groups_observed pct_groups_1_value \\\n", + "0 Centralisation Date 275762 0.323591 \n", + "1 Value € - NetFlows 275762 0.323888 \n", + "2 Value Ccy - NetFlows 275762 0.323902 \n", + "3 Quantity - NetFlows 275762 0.325879 \n", + "4 Value € - Redemption 275762 0.409785 \n", + "5 Value Ccy - Redemption 275762 0.409788 \n", + "6 Quantity - Redemption 275762 0.410916 \n", + "7 Value € - Subscription 275762 0.443324 \n", + "8 Value Ccy - Subscription 275762 0.443335 \n", + "9 Quantity - Subscription 275762 0.444307 \n", + "10 Company - Id 275440 0.615590 \n", + "11 Company - Ultimate Parent Id 275440 0.615600 \n", + "12 Registrar Account - Region 275762 0.999307 \n", + "13 RegistrarAccount - Country 275762 0.999307 \n", + "14 Agreement - Code 275762 0.999888 \n", + "15 Product - Asset Type 275529 1.000000 \n", + "16 Product - Strategy 275761 1.000000 \n", + "17 Product - Legal Status 275762 1.000000 \n", + "18 Product - Is Dedie ? 275762 1.000000 \n", + "19 Product - Fund 275762 1.000000 \n", + "20 Product - Shareclass Type 275762 1.000000 \n", + "21 Product - Shareclass Currency 275762 1.000000 \n", + "\n", + " pct_groups_gt1_value max_n_unique mean_n_unique \n", + "0 0.676409 23 5.443382 \n", + "1 0.676112 37 5.435386 \n", + "2 0.676098 37 5.435190 \n", + "3 0.674121 37 5.361130 \n", + "4 0.590215 34 4.152294 \n", + "5 0.590212 34 4.152120 \n", + "6 0.589084 34 4.090861 \n", + "7 0.556676 31 3.716030 \n", + "8 0.556665 31 3.715904 \n", + "9 0.555693 30 3.667652 \n", + "10 0.384410 3 1.384432 \n", + "11 0.384400 2 1.384400 \n", + "12 0.000693 2 1.000693 \n", + "13 0.000693 2 1.000693 \n", + "14 0.000112 2 1.000112 \n", + "15 0.000000 1 1.000000 \n", + "16 0.000000 1 1.000000 \n", + "17 0.000000 1 1.000000 \n", + "18 0.000000 1 1.000000 \n", + "19 0.000000 1 1.000000 \n", + "20 0.000000 1 1.000000 \n", + "21 0.000000 1 1.000000 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ============================================================\n", + "# DIAGNOSTIC RAPIDE D'AGREGATION - DF_FLOWS\n", + "# Pour chaque colonne, on regarde combien de valeurs distinctes\n", + "#prend dans chaque groupe client x ISIN x mois\n", + "# ============================================================\n", + "\n", + "KEYS = [ID_COL, ISIN_COL, \"month\"]\n", + "df_flows_diag_base = df_flows.dropna(subset=KEYS).copy()\n", + "flow_cols_to_check = [c for c in df_flows_diag_base.columns if c not in KEYS]\n", + "flow_summary_rows = []\n", + "\n", + "for col in flow_cols_to_check:\n", + " s = df_flows_diag_base[col]\n", + " # on enlève juste les NA pour cette colonne\n", + " tmp = df_flows_diag_base.loc[s.notna(), KEYS + [col]].copy()\n", + " # optionnel : nettoyer les strings seulement si la colonne est texte\n", + " if tmp[col].dtype == \"object\":\n", + " tmp[col] = tmp[col].astype(str).str.strip()\n", + " tmp = tmp[tmp[col] != \"\"]\n", + "\n", + " # nb de valeurs distinctes dans chaque groupe\n", + " nun = tmp.groupby(KEYS)[col].nunique()\n", + " flow_summary_rows.append({\n", + " \"column\": col,\n", + " \"n_groups_observed\": int(nun.shape[0]),\n", + " \"pct_groups_1_value\": float((nun == 1).mean()) if len(nun) else np.nan,\n", + " \"pct_groups_gt1_value\": float((nun > 1).mean()) if len(nun) else np.nan,\n", + " \"max_n_unique\": int(nun.max()) if len(nun) else np.nan,\n", + " \"mean_n_unique\": float(nun.mean()) if len(nun) else np.nan,\n", + " })\n", + "\n", + "flow_agg_summary = (\n", + " pd.DataFrame(flow_summary_rows)\n", + " .sort_values([\"pct_groups_gt1_value\", \"max_n_unique\"], ascending=[False, False])\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "display(flow_agg_summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8cd2d9d3-5ced-44f5-af20-7f501435a43f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
columnn_groups_observedpct_groups_1_valuepct_groups_gt1_valuemax_n_uniquemean_n_unique
0RegistrarAccount - Country9552410.9363540.063646101.125696
1Registrar Account - Region9552410.9369760.06302491.115268
2Quantity - AUM9552410.9401440.059856101.090657
3Value - AUM CCY9552410.9401710.059829101.090631
4Value - AUM €9552410.9401710.059829101.090631
5Agreement - Code9552410.9835300.01647021.016470
6Company - Id9543720.9836820.01631831.016322
7Company - Ultimate Parent Id9543720.9841640.01583631.015840
8reg_used9552411.0000000.00000011.000000
9Product - Asset Type9345241.0000000.00000011.000000
10Product - Strategy9552101.0000000.00000011.000000
11Product - Legal Status9552411.0000000.00000011.000000
12Product - Is Dedie ?9552411.0000000.00000011.000000
13Product - Fund9552411.0000000.00000011.000000
14Product - Shareclass Type9552411.0000000.00000011.000000
15Product - Shareclass Currency9552411.0000000.00000011.000000
16Centralisation Date9552411.0000000.00000011.000000
\n", + "
" + ], + "text/plain": [ + " column n_groups_observed pct_groups_1_value \\\n", + "0 RegistrarAccount - Country 955241 0.936354 \n", + "1 Registrar Account - Region 955241 0.936976 \n", + "2 Quantity - AUM 955241 0.940144 \n", + "3 Value - AUM CCY 955241 0.940171 \n", + "4 Value - AUM € 955241 0.940171 \n", + "5 Agreement - Code 955241 0.983530 \n", + "6 Company - Id 954372 0.983682 \n", + "7 Company - Ultimate Parent Id 954372 0.984164 \n", + "8 reg_used 955241 1.000000 \n", + "9 Product - Asset Type 934524 1.000000 \n", + "10 Product - Strategy 955210 1.000000 \n", + "11 Product - Legal Status 955241 1.000000 \n", + "12 Product - Is Dedie ? 955241 1.000000 \n", + "13 Product - Fund 955241 1.000000 \n", + "14 Product - Shareclass Type 955241 1.000000 \n", + "15 Product - Shareclass Currency 955241 1.000000 \n", + "16 Centralisation Date 955241 1.000000 \n", + "\n", + " pct_groups_gt1_value max_n_unique mean_n_unique \n", + "0 0.063646 10 1.125696 \n", + "1 0.063024 9 1.115268 \n", + "2 0.059856 10 1.090657 \n", + "3 0.059829 10 1.090631 \n", + "4 0.059829 10 1.090631 \n", + "5 0.016470 2 1.016470 \n", + "6 0.016318 3 1.016322 \n", + "7 0.015836 3 1.015840 \n", + "8 0.000000 1 1.000000 \n", + "9 0.000000 1 1.000000 \n", + "10 0.000000 1 1.000000 \n", + "11 0.000000 1 1.000000 \n", + "12 0.000000 1 1.000000 \n", + "13 0.000000 1 1.000000 \n", + "14 0.000000 1 1.000000 \n", + "15 0.000000 1 1.000000 \n", + "16 0.000000 1 1.000000 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ============================================================\n", + "# DIAGNOSTIC RAPIDE D'AGREGATION - DF_AUM\n", + "# Pour chaque colonne, on regarde combien de valeurs distinctes\n", + "# prend dans chaque groupe client x ISIN x mois\n", + "# ============================================================\n", + "\n", + "KEYS = [ID_COL, ISIN_COL, \"month\"]\n", + "\n", + "df_aum_diag_base = df_aum.dropna(subset=KEYS).copy()\n", + "\n", + "aum_cols_to_check = [c for c in df_aum_diag_base.columns if c not in KEYS]\n", + "\n", + "aum_summary_rows = []\n", + "\n", + "for col in aum_cols_to_check:\n", + " s = df_aum_diag_base[col]\n", + "\n", + " # on enlève les NA pour cette colonne\n", + " tmp = df_aum_diag_base.loc[s.notna(), KEYS + [col]].copy()\n", + "\n", + " # nettoyage léger si texte\n", + " if tmp[col].dtype == \"object\":\n", + " tmp[col] = tmp[col].astype(str).str.strip()\n", + " tmp = tmp[tmp[col] != \"\"]\n", + "\n", + " # nb de valeurs distinctes dans chaque groupe\n", + " nun = tmp.groupby(KEYS)[col].nunique()\n", + "\n", + " aum_summary_rows.append({\n", + " \"column\": col,\n", + " \"n_groups_observed\": int(nun.shape[0]),\n", + " \"pct_groups_1_value\": float((nun == 1).mean()) if len(nun) else np.nan,\n", + " \"pct_groups_gt1_value\": float((nun > 1).mean()) if len(nun) else np.nan,\n", + " \"max_n_unique\": int(nun.max()) if len(nun) else np.nan,\n", + " \"mean_n_unique\": float(nun.mean()) if len(nun) else np.nan,\n", + " })\n", + "\n", + "aum_agg_summary = (\n", + " pd.DataFrame(aum_summary_rows)\n", + " .sort_values([\"pct_groups_gt1_value\", \"max_n_unique\"], ascending=[False, False])\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "display(aum_agg_summary)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc0e24e1-162a-48e8-9486-6494e762ae26", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cba5b9bf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDProduct - Isinmonthnet_flow_qtygross_flow_qtysub_qtyred_qtyn_txProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass Currency
018872FR00101351032015-01-01673.9901045.99859.990-186.0009DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR
118872FR00101351032015-02-01988.0001712.001350.000-362.00012DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR
218872FR00101351032015-03-019.7101561.71785.710-776.00012DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR
318872FR00101351032015-04-01-123.2341830.19853.478-976.71211DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR
418872FR00101351032015-05-01121.000529.00325.000-204.0006DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID Product - Isin month net_flow_qty \\\n", + "0 18872 FR0010135103 2015-01-01 673.990 \n", + "1 18872 FR0010135103 2015-02-01 988.000 \n", + "2 18872 FR0010135103 2015-03-01 9.710 \n", + "3 18872 FR0010135103 2015-04-01 -123.234 \n", + "4 18872 FR0010135103 2015-05-01 121.000 \n", + "\n", + " gross_flow_qty sub_qty red_qty n_tx Product - Asset Type \\\n", + "0 1045.99 859.990 -186.000 9 Diversified \n", + "1 1712.00 1350.000 -362.000 12 Diversified \n", + "2 1561.71 785.710 -776.000 12 Diversified \n", + "3 1830.19 853.478 -976.712 11 Diversified \n", + "4 529.00 325.000 -204.000 6 Diversified \n", + "\n", + " Product - Strategy Product - Legal Status Product - Is Dedie ? \\\n", + "0 Patrimoine FCP NO \n", + "1 Patrimoine FCP NO \n", + "2 Patrimoine FCP NO \n", + "3 Patrimoine FCP NO \n", + "4 Patrimoine FCP NO \n", + "\n", + " Product - Fund Product - Shareclass Type \\\n", + "0 Carmignac Patrimoine A \n", + "1 Carmignac Patrimoine A \n", + "2 Carmignac Patrimoine A \n", + "3 Carmignac Patrimoine A \n", + "4 Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency \n", + "0 EUR \n", + "1 EUR \n", + "2 EUR \n", + "3 EUR \n", + "4 EUR " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(275762, 15)\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# DF_FLOWS_MENSUEL : 1 client x 1 ISIN x 1 mois\n", + "# ============================================================\n", + "\n", + "KEYS = [ID_COL, ISIN_COL, \"month\"]\n", + "\n", + "df_flows_clean = df_flows.dropna(subset=KEYS).copy()\n", + "\n", + "# -------------------------\n", + "# NUMERIC FLOWS\n", + "# -------------------------\n", + "df_flows_clean[\"net_flow_qty\"] = pd.to_numeric(df_flows_clean[FLOW_QTY_COL], errors=\"coerce\").fillna(0.0)\n", + "df_flows_clean[\"sub_qty\"] = pd.to_numeric(df_flows_clean[FLOW_SUB_COL], errors=\"coerce\").fillna(0.0)\n", + "df_flows_clean[\"red_qty\"] = pd.to_numeric(df_flows_clean[FLOW_RED_COL], errors=\"coerce\").fillna(0.0)\n", + "\n", + "df_flows_clean[\"gross_flow_qty\"] = df_flows_clean[\"sub_qty\"].abs() + df_flows_clean[\"red_qty\"].abs()\n", + "\n", + "# -------------------------\n", + "# COLONNES PRODUIT STABLES\n", + "# -------------------------\n", + "product_cols = [\n", + " \"Product - Asset Type\",\n", + " \"Product - Strategy\",\n", + " \"Product - Legal Status\",\n", + " \"Product - Is Dedie ?\",\n", + " \"Product - Fund\",\n", + " \"Product - Shareclass Type\",\n", + " \"Product - Shareclass Currency\",\n", + "]\n", + "\n", + "# -------------------------\n", + "# AGGREGATION\n", + "# -------------------------\n", + "agg_dict = {\n", + " \"net_flow_qty\": (\"net_flow_qty\", \"sum\"),\n", + " \"gross_flow_qty\": (\"gross_flow_qty\", \"sum\"),\n", + " \"sub_qty\": (\"sub_qty\", \"sum\"),\n", + " \"red_qty\": (\"red_qty\", \"sum\"),\n", + " \"n_tx\": (FLOW_QTY_COL, \"size\"),\n", + "}\n", + "\n", + "# ajouter produit en \"first\"\n", + "for col in product_cols:\n", + " if col in df_flows_clean.columns:\n", + " agg_dict[col] = (col, \"first\")\n", + "\n", + "# -------------------------\n", + "# GROUPBY FINAL\n", + "# -------------------------\n", + "df_flows_m = (\n", + " df_flows_clean\n", + " .groupby(KEYS, as_index=False)\n", + " .agg(**agg_dict)\n", + ")\n", + "\n", + "display(df_flows_m.head())\n", + "print(df_flows_m.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "94621000", + "metadata": {}, + "source": [ + "## 10. Base fine mensuelle AUM : `client × ISIN × month`" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b46d886b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDProduct - Isinmonthaum_qtyaum_valProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass CurrencyRegistrar Account - RegionRegistrarAccount - Country
018872FR00101351032015-01-0149094.9153.242523e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland
118872FR00101351032015-02-0149797.9153.368032e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland
218872FR00101351032015-03-0150302.6273.505691e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland
318872FR00101351032015-04-0150219.3933.452433e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland
418872FR00101351032015-05-0153685.3933.699729e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID Product - Isin month aum_qty aum_val \\\n", + "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n", + "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n", + "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n", + "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n", + "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n", + "\n", + " Product - Asset Type Product - Strategy Product - Legal Status \\\n", + "0 Diversified Patrimoine FCP \n", + "1 Diversified Patrimoine FCP \n", + "2 Diversified Patrimoine FCP \n", + "3 Diversified Patrimoine FCP \n", + "4 Diversified Patrimoine FCP \n", + "\n", + " Product - Is Dedie ? Product - Fund Product - Shareclass Type \\\n", + "0 NO Carmignac Patrimoine A \n", + "1 NO Carmignac Patrimoine A \n", + "2 NO Carmignac Patrimoine A \n", + "3 NO Carmignac Patrimoine A \n", + "4 NO Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency Registrar Account - Region \\\n", + "0 EUR Switzerland \n", + "1 EUR Switzerland \n", + "2 EUR Switzerland \n", + "3 EUR Switzerland \n", + "4 EUR Switzerland \n", + "\n", + " RegistrarAccount - Country \n", + "0 Switzerland \n", + "1 Switzerland \n", + "2 Switzerland \n", + "3 Switzerland \n", + "4 Switzerland " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(955241, 14)\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# DF_AUM_MENSUEL : 1 client x 1 ISIN x 1 mois\n", + "# ============================================================\n", + "\n", + "df_aum_clean = df_aum.dropna(subset=KEYS).copy()\n", + "\n", + "# conversion\n", + "df_aum_clean[\"aum_qty\"] = pd.to_numeric(df_aum_clean[AUM_QTY_COL], errors=\"coerce\")\n", + "df_aum_clean[\"aum_val\"] = pd.to_numeric(df_aum_clean[AUM_VAL_COL], errors=\"coerce\")\n", + "\n", + "# -------------------------\n", + "# AGGREGATION\n", + "# -------------------------\n", + "agg_dict_aum = {\n", + " \"aum_qty\": (\"aum_qty\", \"last\"), # très important\n", + " \"aum_val\": (\"aum_val\", \"last\"),\n", + "}\n", + "\n", + "# colonnes produit (stables)\n", + "for col in product_cols:\n", + " if col in df_aum_clean.columns:\n", + " agg_dict_aum[col] = (col, \"first\")\n", + "\n", + "# region / country (quasi stable)\n", + "for col in [REGION_COL, COUNTRY_COL]:\n", + " if col in df_aum_clean.columns:\n", + " agg_dict_aum[col] = (col, \"first\")\n", + "\n", + "# -------------------------\n", + "# GROUPBY\n", + "# -------------------------\n", + "df_aum_m = (\n", + " df_aum_clean\n", + " .sort_values(AUM_DATE_COL)\n", + " .groupby(KEYS, as_index=False)\n", + " .agg(**agg_dict_aum)\n", + ")\n", + "\n", + "display(df_aum_m.head())\n", + "print(df_aum_m.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "8576701e", + "metadata": {}, + "source": [ + "## 11. Fusion flows + AUM : table centrale `df_rel_m`" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "6120b573", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDProduct - Isinmonthaum_qtyaum_valProduct - Asset Type_xProduct - Strategy_xProduct - Legal Status_xProduct - Is Dedie ?_xProduct - Fund_xProduct - Shareclass Type_xProduct - Shareclass Currency_xRegistrar Account - RegionRegistrarAccount - Countrynet_flow_qtygross_flow_qtysub_qtyred_qtyn_txProduct - Asset Type_yProduct - Strategy_yProduct - Legal Status_yProduct - Is Dedie ?_yProduct - Fund_yProduct - Shareclass Type_yProduct - Shareclass Currency_yisin_held_flagactive_rel_monthflow_to_aumturnover_rel
018872FR00101351032015-01-0149094.9153.242523e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland673.9901045.99859.990-186.0009.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0137280.021305
118872FR00101351032015-02-0149797.9153.368032e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland988.0001712.001350.000-362.00012.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0198400.034378
218872FR00101351032015-03-0150302.6273.505691e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland9.7101561.71785.710-776.00012.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0001930.031046
318872FR00101351032015-04-0150219.3933.452433e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland-123.2341830.19853.478-976.71211.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR11-0.0024540.036443
418872FR00101351032015-05-0153685.3933.699729e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland121.000529.00325.000-204.0006.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0022540.009854
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID Product - Isin month aum_qty aum_val \\\n", + "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n", + "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n", + "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n", + "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n", + "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n", + "\n", + " Product - Asset Type_x Product - Strategy_x Product - Legal Status_x \\\n", + "0 Diversified Patrimoine FCP \n", + "1 Diversified Patrimoine FCP \n", + "2 Diversified Patrimoine FCP \n", + "3 Diversified Patrimoine FCP \n", + "4 Diversified Patrimoine FCP \n", + "\n", + " Product - Is Dedie ?_x Product - Fund_x Product - Shareclass Type_x \\\n", + "0 NO Carmignac Patrimoine A \n", + "1 NO Carmignac Patrimoine A \n", + "2 NO Carmignac Patrimoine A \n", + "3 NO Carmignac Patrimoine A \n", + "4 NO Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency_x Registrar Account - Region \\\n", + "0 EUR Switzerland \n", + "1 EUR Switzerland \n", + "2 EUR Switzerland \n", + "3 EUR Switzerland \n", + "4 EUR Switzerland \n", + "\n", + " RegistrarAccount - Country net_flow_qty gross_flow_qty sub_qty red_qty \\\n", + "0 Switzerland 673.990 1045.99 859.990 -186.000 \n", + "1 Switzerland 988.000 1712.00 1350.000 -362.000 \n", + "2 Switzerland 9.710 1561.71 785.710 -776.000 \n", + "3 Switzerland -123.234 1830.19 853.478 -976.712 \n", + "4 Switzerland 121.000 529.00 325.000 -204.000 \n", + "\n", + " n_tx Product - Asset Type_y Product - Strategy_y Product - Legal Status_y \\\n", + "0 9.0 Diversified Patrimoine FCP \n", + "1 12.0 Diversified Patrimoine FCP \n", + "2 12.0 Diversified Patrimoine FCP \n", + "3 11.0 Diversified Patrimoine FCP \n", + "4 6.0 Diversified Patrimoine FCP \n", + "\n", + " Product - Is Dedie ?_y Product - Fund_y Product - Shareclass Type_y \\\n", + "0 NO Carmignac Patrimoine A \n", + "1 NO Carmignac Patrimoine A \n", + "2 NO Carmignac Patrimoine A \n", + "3 NO Carmignac Patrimoine A \n", + "4 NO Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency_y isin_held_flag active_rel_month \\\n", + "0 EUR 1 1 \n", + "1 EUR 1 1 \n", + "2 EUR 1 1 \n", + "3 EUR 1 1 \n", + "4 EUR 1 1 \n", + "\n", + " flow_to_aum turnover_rel \n", + "0 0.013728 0.021305 \n", + "1 0.019840 0.034378 \n", + "2 0.000193 0.031046 \n", + "3 -0.002454 0.036443 \n", + "4 0.002254 0.009854 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(962611, 30)\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# DF_REL_M : fusion flows + aum\n", + "# ============================================================\n", + "\n", + "keys = pd.concat([\n", + " df_flows_m[KEYS],\n", + " df_aum_m[KEYS]\n", + "]).drop_duplicates()\n", + "\n", + "df_rel_m = (\n", + " keys\n", + " .merge(df_aum_m, on=KEYS, how=\"left\")\n", + " .merge(df_flows_m, on=KEYS, how=\"left\")\n", + ")\n", + "\n", + "# -------------------------\n", + "# CLEAN NUMERIC\n", + "# -------------------------\n", + "for c in [\n", + " \"aum_qty\", \"aum_val\",\n", + " \"net_flow_qty\", \"gross_flow_qty\",\n", + " \"sub_qty\", \"red_qty\", \"n_tx\"\n", + "]:\n", + " if c in df_rel_m.columns:\n", + " df_rel_m[c] = pd.to_numeric(df_rel_m[c], errors=\"coerce\").fillna(0.0)\n", + "\n", + "# -------------------------\n", + "# FEATURES BASIQUES\n", + "# -------------------------\n", + "df_rel_m[\"isin_held_flag\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n", + "df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n", + "\n", + "df_rel_m[\"flow_to_aum\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + 1.0)\n", + "df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + 1.0)\n", + "\n", + "display(df_rel_m.head())\n", + "print(df_rel_m.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "2c2ca596", + "metadata": {}, + "source": [ + "## 12. NAV mensuel et taux mensuels" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "9fe5c3c0-80f6-4fe4-bd67-08323f781c7a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_nav_m: (30336, 17)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IsinmonthPrice (TF PartPrice)ret_fund_mret_fund_3m_meanret_fund_6m_meanret_fund_12m_meandrawdown_proxy
617663FR00074867092011-01-011534.80NaNNaNNaNNaN0.000000
617668FR00074867092011-02-011500.73-0.022198-0.022198-0.022198-0.022198-0.022198
617672FR00074867092011-03-011481.29-0.012954-0.017576-0.017576-0.017576-0.034864
617677FR00074867092011-04-011517.600.024512-0.003547-0.003547-0.003547-0.011207
617681FR00074867092011-05-011504.83-0.0084150.001048-0.004764-0.004764-0.019527
617686FR00074867092011-06-011492.92-0.0079150.002728-0.005394-0.005394-0.027287
617690FR00074867092011-07-011473.52-0.012995-0.009775-0.006661-0.006661-0.039927
617694FR00074867092011-08-011346.78-0.086012-0.035640-0.017296-0.017996-0.122505
617699FR00074867092011-09-011330.69-0.011947-0.036984-0.017128-0.017240-0.132988
617703FR00074867092011-10-011372.910.031728-0.022077-0.015926-0.011799-0.105480
\n", + "
" + ], + "text/plain": [ + " Isin month Price (TF PartPrice) ret_fund_m \\\n", + "617663 FR0007486709 2011-01-01 1534.80 NaN \n", + "617668 FR0007486709 2011-02-01 1500.73 -0.022198 \n", + "617672 FR0007486709 2011-03-01 1481.29 -0.012954 \n", + "617677 FR0007486709 2011-04-01 1517.60 0.024512 \n", + "617681 FR0007486709 2011-05-01 1504.83 -0.008415 \n", + "617686 FR0007486709 2011-06-01 1492.92 -0.007915 \n", + "617690 FR0007486709 2011-07-01 1473.52 -0.012995 \n", + "617694 FR0007486709 2011-08-01 1346.78 -0.086012 \n", + "617699 FR0007486709 2011-09-01 1330.69 -0.011947 \n", + "617703 FR0007486709 2011-10-01 1372.91 0.031728 \n", + "\n", + " ret_fund_3m_mean ret_fund_6m_mean ret_fund_12m_mean drawdown_proxy \n", + "617663 NaN NaN NaN 0.000000 \n", + "617668 -0.022198 -0.022198 -0.022198 -0.022198 \n", + "617672 -0.017576 -0.017576 -0.017576 -0.034864 \n", + "617677 -0.003547 -0.003547 -0.003547 -0.011207 \n", + "617681 0.001048 -0.004764 -0.004764 -0.019527 \n", + "617686 0.002728 -0.005394 -0.005394 -0.027287 \n", + "617690 -0.009775 -0.006661 -0.006661 -0.039927 \n", + "617694 -0.035640 -0.017296 -0.017996 -0.122505 \n", + "617699 -0.036984 -0.017128 -0.017240 -0.132988 \n", + "617703 -0.022077 -0.015926 -0.011799 -0.105480 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ============================================================\n", + "# NAV mensuel + perfs mensuelles corrigées\n", + "# ============================================================\n", + "\n", + "df_nav_m = None\n", + "\n", + "if df_nav is not None and NAV_ISIN_COL is not None and NAV_PRICE_COL is not None:\n", + " df_nav = df_nav.copy()\n", + "\n", + " # nettoyage prix fonds\n", + " df_nav[NAV_PRICE_COL] = (\n", + " df_nav[NAV_PRICE_COL]\n", + " .astype(str)\n", + " .str.replace(\"\\u202f\", \"\", regex=False)\n", + " .str.replace(\" \", \"\", regex=False)\n", + " .str.replace(\",\", \".\", regex=False)\n", + " )\n", + " df_nav[NAV_PRICE_COL] = pd.to_numeric(df_nav[NAV_PRICE_COL], errors=\"coerce\")\n", + "\n", + " # nettoyage benchmark si dispo\n", + " if NAV_BENCH_COL is not None and NAV_BENCH_COL in df_nav.columns:\n", + " df_nav[NAV_BENCH_COL] = (\n", + " df_nav[NAV_BENCH_COL]\n", + " .astype(str)\n", + " .str.replace(\"\\u202f\", \"\", regex=False)\n", + " .str.replace(\" \", \"\", regex=False)\n", + " .str.replace(\",\", \".\", regex=False)\n", + " )\n", + " df_nav[NAV_BENCH_COL] = pd.to_numeric(df_nav[NAV_BENCH_COL], errors=\"coerce\")\n", + "\n", + " # dernière NAV du mois par ISIN\n", + " df_nav_m = (\n", + " df_nav\n", + " .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n", + " .sort_values([NAV_ISIN_COL, \"month\", NAV_DATE_COL])\n", + " .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n", + " .tail(1)\n", + " .copy()\n", + " )\n", + "\n", + " # rendement mensuel\n", + " df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n", + "\n", + " # moyenne glissante des rendements mensuels\n", + " df_nav_m[\"ret_fund_3m_mean\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n", + " .transform(lambda s: s.rolling(3, min_periods=1).mean())\n", + " )\n", + "\n", + " df_nav_m[\"ret_fund_6m_mean\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n", + " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n", + " )\n", + "\n", + " df_nav_m[\"ret_fund_12m_mean\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n", + " .transform(lambda s: s.rolling(12, min_periods=1).mean())\n", + " )\n", + "\n", + " # volatilité glissante des rendements\n", + " df_nav_m[\"ret_vol_3m\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n", + " .transform(lambda s: s.rolling(3, min_periods=2).std())\n", + " )\n", + "\n", + " df_nav_m[\"ret_vol_6m\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n", + " .transform(lambda s: s.rolling(6, min_periods=2).std())\n", + " )\n", + "\n", + " # drawdown\n", + " df_nav_m[\"drawdown_proxy\"] = (\n", + " df_nav_m[NAV_PRICE_COL] /\n", + " df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].cummax()\n", + " ) - 1\n", + "\n", + " # benchmark si dispo\n", + " if NAV_BENCH_COL is not None and NAV_BENCH_COL in df_nav_m.columns:\n", + " df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n", + "\n", + " df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n", + "\n", + " df_nav_m[\"active_return_3m_mean\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"active_return_m\"]\n", + " .transform(lambda s: s.rolling(3, min_periods=1).mean())\n", + " )\n", + "\n", + " df_nav_m[\"active_return_6m_mean\"] = (\n", + " df_nav_m.groupby(NAV_ISIN_COL)[\"active_return_m\"]\n", + " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n", + " )\n", + "\n", + "print(\"df_nav_m:\", None if df_nav_m is None else df_nav_m.shape)\n", + "\n", + "if df_nav_m is not None:\n", + " display(\n", + " df_nav_m[\n", + " [\n", + " NAV_ISIN_COL, \"month\", NAV_PRICE_COL,\n", + " \"ret_fund_m\", \"ret_fund_3m_mean\", \"ret_fund_6m_mean\",\n", + " \"ret_fund_12m_mean\", \"drawdown_proxy\"\n", + " ]\n", + " ].head(10)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d2f87bef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_nav_m: (30336, 17)\n", + "df_rates_m: (131, 4)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IsinmonthPrice (TF PartPrice)ret_fund_m
617663FR00074867092011-01-011534.80NaN
617668FR00074867092011-02-011500.73-0.022198
617672FR00074867092011-03-011481.29-0.012954
617677FR00074867092011-04-011517.600.024512
617681FR00074867092011-05-011504.83-0.008415
\n", + "
" + ], + "text/plain": [ + " Isin month Price (TF PartPrice) ret_fund_m\n", + "617663 FR0007486709 2011-01-01 1534.80 NaN\n", + "617668 FR0007486709 2011-02-01 1500.73 -0.022198\n", + "617672 FR0007486709 2011-03-01 1481.29 -0.012954\n", + "617677 FR0007486709 2011-04-01 1517.60 0.024512\n", + "617681 FR0007486709 2011-05-01 1504.83 -0.008415" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
monthYld to Maturitydelta_rate_m
02014-12-010.144NaN
222015-01-010.086-0.058
432015-02-010.064-0.022
652015-03-010.050-0.014
862015-04-01-0.027-0.077
\n", + "
" + ], + "text/plain": [ + " month Yld to Maturity delta_rate_m\n", + "0 2014-12-01 0.144 NaN\n", + "22 2015-01-01 0.086 -0.058\n", + "43 2015-02-01 0.064 -0.022\n", + "65 2015-03-01 0.050 -0.014\n", + "86 2015-04-01 -0.027 -0.077" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_rates_m = None\n", + "\n", + "if df_rates is not None and RATE_DATE_COL is not None and RATE_VAL_COL is not None:\n", + " df_rates = df_rates.copy()\n", + "\n", + " df_rates[RATE_VAL_COL] = (\n", + " df_rates[RATE_VAL_COL]\n", + " .astype(str)\n", + " .str.replace(\"\\u202f\", \"\", regex=False)\n", + " .str.replace(\" \", \"\", regex=False)\n", + " .str.replace(\",\", \".\", regex=False)\n", + " )\n", + " df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n", + "\n", + " df_rates_m = (\n", + " df_rates\n", + " .dropna(subset=[\"month\", RATE_VAL_COL])\n", + " .sort_values(RATE_DATE_COL)\n", + " .groupby(\"month\", as_index=False)\n", + " .tail(1)\n", + " .copy()\n", + " )\n", + "\n", + " df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n", + "\n", + "print(\"df_nav_m:\", None if df_nav_m is None else df_nav_m.shape)\n", + "print(\"df_rates_m:\", None if df_rates_m is None else df_rates_m.shape)\n", + "\n", + "if df_nav_m is not None:\n", + " display(df_nav_m[[NAV_ISIN_COL, \"month\", NAV_PRICE_COL, \"ret_fund_m\"]].head())\n", + "\n", + "if df_rates_m is not None:\n", + " display(df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]].head())" + ] + }, + { + "cell_type": "markdown", + "id": "6c3a4e1f", + "metadata": {}, + "source": [ + "## 13. Intégration optionnelle des performances Carmignac" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d6402369-171e-4589-a311-aa440ebf10f2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df_perf_monthly: (30336, 12)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Product - Isinmonthret_fund_mret_fund_3m_meanret_fund_6m_meanret_fund_12m_meanret_vol_3mret_vol_6mdrawdown_proxyactive_return_mactive_return_3m_meanactive_return_6m_mean
617663FR00074867092011-01-01NaNNaNNaNNaNNaNNaN0.000000NaNNaNNaN
617668FR00074867092011-02-01-0.022198-0.022198-0.022198-0.022198NaNNaN-0.022198-0.028319-0.028319-0.028319
617672FR00074867092011-03-01-0.012954-0.017576-0.017576-0.0175760.0065370.006537-0.0348640.014369-0.006975-0.006975
617677FR00074867092011-04-010.024512-0.003547-0.003547-0.0035470.0247350.024735-0.011207-0.006997-0.006982-0.006982
617681FR00074867092011-05-01-0.0084150.001048-0.004764-0.0047640.0204470.020343-0.0195270.0036910.003688-0.004314
\n", + "
" + ], + "text/plain": [ + " Product - Isin month ret_fund_m ret_fund_3m_mean \\\n", + "617663 FR0007486709 2011-01-01 NaN NaN \n", + "617668 FR0007486709 2011-02-01 -0.022198 -0.022198 \n", + "617672 FR0007486709 2011-03-01 -0.012954 -0.017576 \n", + "617677 FR0007486709 2011-04-01 0.024512 -0.003547 \n", + "617681 FR0007486709 2011-05-01 -0.008415 0.001048 \n", + "\n", + " ret_fund_6m_mean ret_fund_12m_mean ret_vol_3m ret_vol_6m \\\n", + "617663 NaN NaN NaN NaN \n", + "617668 -0.022198 -0.022198 NaN NaN \n", + "617672 -0.017576 -0.017576 0.006537 0.006537 \n", + "617677 -0.003547 -0.003547 0.024735 0.024735 \n", + "617681 -0.004764 -0.004764 0.020447 0.020343 \n", + "\n", + " drawdown_proxy active_return_m active_return_3m_mean \\\n", + "617663 0.000000 NaN NaN \n", + "617668 -0.022198 -0.028319 -0.028319 \n", + "617672 -0.034864 0.014369 -0.006975 \n", + "617677 -0.011207 -0.006997 -0.006982 \n", + "617681 -0.019527 0.003691 0.003688 \n", + "\n", + " active_return_6m_mean \n", + "617663 NaN \n", + "617668 -0.028319 \n", + "617672 -0.006975 \n", + "617677 -0.006982 \n", + "617681 -0.004314 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_perf_monthly = None\n", + "\n", + "if df_perf_monthly is None:\n", + " df_perf_monthly = (\n", + " df_nav_m[\n", + " [\n", + " NAV_ISIN_COL, \"month\",\n", + " \"ret_fund_m\",\n", + " \"ret_fund_3m_mean\",\n", + " \"ret_fund_6m_mean\",\n", + " \"ret_fund_12m_mean\",\n", + " \"ret_vol_3m\",\n", + " \"ret_vol_6m\",\n", + " \"drawdown_proxy\"\n", + " ]\n", + " + ([ \"active_return_m\", \"active_return_3m_mean\", \"active_return_6m_mean\" ] if \"active_return_m\" in df_nav_m.columns else [])\n", + " ]\n", + " .rename(columns={NAV_ISIN_COL: ISIN_COL})\n", + " .copy()\n", + " ) if df_nav_m is not None else None\n", + "\n", + "print(\"df_perf_monthly:\", None if df_perf_monthly is None else df_perf_monthly.shape)\n", + "if df_perf_monthly is not None:\n", + " display(df_perf_monthly.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "70a5eae1-8d98-427a-8aab-f10a4c8ad782", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Product - Isinmonthret_fund_mret_fund_3m_meanret_fund_6m_meanret_fund_12m_meanret_vol_3mret_vol_6mdrawdown_proxyactive_return_mactive_return_3m_meanactive_return_6m_mean
617663FR00074867092011-01-01NaNNaNNaNNaNNaNNaN0.000000NaNNaNNaN
617668FR00074867092011-02-01-0.022198-0.022198-0.022198-0.022198NaNNaN-0.022198-0.028319-0.028319-0.028319
617672FR00074867092011-03-01-0.012954-0.017576-0.017576-0.0175760.0065370.006537-0.0348640.014369-0.006975-0.006975
617677FR00074867092011-04-010.024512-0.003547-0.003547-0.0035470.0247350.024735-0.011207-0.006997-0.006982-0.006982
617681FR00074867092011-05-01-0.0084150.001048-0.004764-0.0047640.0204470.020343-0.0195270.0036910.003688-0.004314
.......................................
89LU31492002332025-10-010.0350730.0350730.0350730.035073NaNNaN0.0000000.0165770.0165770.016577
35LU31492007462025-09-01NaNNaNNaNNaNNaNNaN0.000000NaNNaNNaN
51LU31492007462025-10-010.0352610.0352610.0352610.035261NaNNaN0.0000000.0167660.0167660.016766
16LU31868888582025-10-01NaNNaNNaNNaNNaNNaN0.000000NaNNaNNaN
4LU31989909082025-10-01NaNNaNNaNNaNNaNNaN0.000000NaNNaNNaN
\n", + "

30336 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Product - Isin month ret_fund_m ret_fund_3m_mean \\\n", + "617663 FR0007486709 2011-01-01 NaN NaN \n", + "617668 FR0007486709 2011-02-01 -0.022198 -0.022198 \n", + "617672 FR0007486709 2011-03-01 -0.012954 -0.017576 \n", + "617677 FR0007486709 2011-04-01 0.024512 -0.003547 \n", + "617681 FR0007486709 2011-05-01 -0.008415 0.001048 \n", + "... ... ... ... ... \n", + "89 LU3149200233 2025-10-01 0.035073 0.035073 \n", + "35 LU3149200746 2025-09-01 NaN NaN \n", + "51 LU3149200746 2025-10-01 0.035261 0.035261 \n", + "16 LU3186888858 2025-10-01 NaN NaN \n", + "4 LU3198990908 2025-10-01 NaN NaN \n", + "\n", + " ret_fund_6m_mean ret_fund_12m_mean ret_vol_3m ret_vol_6m \\\n", + "617663 NaN NaN NaN NaN \n", + "617668 -0.022198 -0.022198 NaN NaN \n", + "617672 -0.017576 -0.017576 0.006537 0.006537 \n", + "617677 -0.003547 -0.003547 0.024735 0.024735 \n", + "617681 -0.004764 -0.004764 0.020447 0.020343 \n", + "... ... ... ... ... \n", + "89 0.035073 0.035073 NaN NaN \n", + "35 NaN NaN NaN NaN \n", + "51 0.035261 0.035261 NaN NaN \n", + "16 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " drawdown_proxy active_return_m active_return_3m_mean \\\n", + "617663 0.000000 NaN NaN \n", + "617668 -0.022198 -0.028319 -0.028319 \n", + "617672 -0.034864 0.014369 -0.006975 \n", + "617677 -0.011207 -0.006997 -0.006982 \n", + "617681 -0.019527 0.003691 0.003688 \n", + "... ... ... ... \n", + "89 0.000000 0.016577 0.016577 \n", + "35 0.000000 NaN NaN \n", + "51 0.000000 0.016766 0.016766 \n", + "16 0.000000 NaN NaN \n", + "4 0.000000 NaN NaN \n", + "\n", + " active_return_6m_mean \n", + "617663 NaN \n", + "617668 -0.028319 \n", + "617672 -0.006975 \n", + "617677 -0.006982 \n", + "617681 -0.004314 \n", + "... ... \n", + "89 0.016577 \n", + "35 NaN \n", + "51 0.016766 \n", + "16 NaN \n", + "4 NaN \n", + "\n", + "[30336 rows x 12 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_perf_monthly" + ] + }, + { + "cell_type": "markdown", + "id": "091fc8ff", + "metadata": {}, + "source": [ + "## 14. Enrichissement de `df_rel_m` avec performances et taux" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "82872d77", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDProduct - Isinmonthaum_qtyaum_valProduct - Asset Type_xProduct - Strategy_xProduct - Legal Status_xProduct - Is Dedie ?_xProduct - Fund_xProduct - Shareclass Type_xProduct - Shareclass Currency_xRegistrar Account - RegionRegistrarAccount - Countrynet_flow_qtygross_flow_qtysub_qtyred_qtyn_txProduct - Asset Type_yProduct - Strategy_yProduct - Legal Status_yProduct - Is Dedie ?_yProduct - Fund_yProduct - Shareclass Type_yProduct - Shareclass Currency_yisin_held_flagactive_rel_monthflow_to_aumturnover_relret_fund_mret_fund_3m_meanret_fund_6m_meanret_fund_12m_meanret_vol_3mret_vol_6mdrawdown_proxyactive_return_mactive_return_3m_meanactive_return_6m_meandelta_rate_m
018872FR00101351032015-01-0149094.9153.242523e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland673.9901045.99859.990-186.0009.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0137280.0213050.0655420.0294030.0202520.0135660.0312990.0236190.0000000.003148-0.000504-0.002776-0.058
118872FR00101351032015-02-0149797.9153.368032e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland988.0001712.001350.000-362.00012.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0198400.0343780.0256200.0342950.0202090.0156710.0279380.0236070.000000-0.002505-0.001482-0.002621-0.022
218872FR00101351032015-03-0150302.6273.505691e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland9.7101561.71785.710-776.00012.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0001930.0310460.0306140.0405920.0246640.0179030.0217510.0224020.0000000.0001870.000277-0.001807-0.014
318872FR00101351032015-04-0150219.3933.452433e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland-123.2341830.19853.478-976.71211.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR11-0.0024540.036443-0.0239090.0107750.0200890.0163080.0301410.029315-0.023909-0.001525-0.001281-0.000893-0.077
418872FR00101351032015-05-0153685.3933.699729e+07DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURSwitzerlandSwitzerland121.000529.00325.000-204.0006.0DiversifiedPatrimoineFCPNOCarmignac PatrimoineAEUR110.0022540.0098540.0032400.0033150.0188050.0147580.0272620.029957-0.020747-0.006286-0.002542-0.002012-0.053
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID Product - Isin month aum_qty aum_val \\\n", + "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n", + "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n", + "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n", + "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n", + "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n", + "\n", + " Product - Asset Type_x Product - Strategy_x Product - Legal Status_x \\\n", + "0 Diversified Patrimoine FCP \n", + "1 Diversified Patrimoine FCP \n", + "2 Diversified Patrimoine FCP \n", + "3 Diversified Patrimoine FCP \n", + "4 Diversified Patrimoine FCP \n", + "\n", + " Product - Is Dedie ?_x Product - Fund_x Product - Shareclass Type_x \\\n", + "0 NO Carmignac Patrimoine A \n", + "1 NO Carmignac Patrimoine A \n", + "2 NO Carmignac Patrimoine A \n", + "3 NO Carmignac Patrimoine A \n", + "4 NO Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency_x Registrar Account - Region \\\n", + "0 EUR Switzerland \n", + "1 EUR Switzerland \n", + "2 EUR Switzerland \n", + "3 EUR Switzerland \n", + "4 EUR Switzerland \n", + "\n", + " RegistrarAccount - Country net_flow_qty gross_flow_qty sub_qty red_qty \\\n", + "0 Switzerland 673.990 1045.99 859.990 -186.000 \n", + "1 Switzerland 988.000 1712.00 1350.000 -362.000 \n", + "2 Switzerland 9.710 1561.71 785.710 -776.000 \n", + "3 Switzerland -123.234 1830.19 853.478 -976.712 \n", + "4 Switzerland 121.000 529.00 325.000 -204.000 \n", + "\n", + " n_tx Product - Asset Type_y Product - Strategy_y Product - Legal Status_y \\\n", + "0 9.0 Diversified Patrimoine FCP \n", + "1 12.0 Diversified Patrimoine FCP \n", + "2 12.0 Diversified Patrimoine FCP \n", + "3 11.0 Diversified Patrimoine FCP \n", + "4 6.0 Diversified Patrimoine FCP \n", + "\n", + " Product - Is Dedie ?_y Product - Fund_y Product - Shareclass Type_y \\\n", + "0 NO Carmignac Patrimoine A \n", + "1 NO Carmignac Patrimoine A \n", + "2 NO Carmignac Patrimoine A \n", + "3 NO Carmignac Patrimoine A \n", + "4 NO Carmignac Patrimoine A \n", + "\n", + " Product - Shareclass Currency_y isin_held_flag active_rel_month \\\n", + "0 EUR 1 1 \n", + "1 EUR 1 1 \n", + "2 EUR 1 1 \n", + "3 EUR 1 1 \n", + "4 EUR 1 1 \n", + "\n", + " flow_to_aum turnover_rel ret_fund_m ret_fund_3m_mean ret_fund_6m_mean \\\n", + "0 0.013728 0.021305 0.065542 0.029403 0.020252 \n", + "1 0.019840 0.034378 0.025620 0.034295 0.020209 \n", + "2 0.000193 0.031046 0.030614 0.040592 0.024664 \n", + "3 -0.002454 0.036443 -0.023909 0.010775 0.020089 \n", + "4 0.002254 0.009854 0.003240 0.003315 0.018805 \n", + "\n", + " ret_fund_12m_mean ret_vol_3m ret_vol_6m drawdown_proxy active_return_m \\\n", + "0 0.013566 0.031299 0.023619 0.000000 0.003148 \n", + "1 0.015671 0.027938 0.023607 0.000000 -0.002505 \n", + "2 0.017903 0.021751 0.022402 0.000000 0.000187 \n", + "3 0.016308 0.030141 0.029315 -0.023909 -0.001525 \n", + "4 0.014758 0.027262 0.029957 -0.020747 -0.006286 \n", + "\n", + " active_return_3m_mean active_return_6m_mean delta_rate_m \n", + "0 -0.000504 -0.002776 -0.058 \n", + "1 -0.001482 -0.002621 -0.022 \n", + "2 0.000277 -0.001807 -0.014 \n", + "3 -0.001281 -0.000893 -0.077 \n", + "4 -0.002542 -0.002012 -0.053 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if df_perf_monthly is not None:\n", + " df_rel_m = df_rel_m.merge(\n", + " df_perf_monthly,\n", + " on=[ISIN_COL, \"month\"],\n", + " how=\"left\"\n", + " )\n", + "\n", + "if df_rates_m is not None:\n", + " df_rel_m = df_rel_m.merge(\n", + " df_rates_m[[\"month\", \"delta_rate_m\"]],\n", + " on=\"month\",\n", + " how=\"left\"\n", + " )\n", + "\n", + "for c in [\n", + " \"ret_fund_m\",\n", + " \"ret_fund_3m_mean\",\n", + " \"ret_fund_6m_mean\",\n", + " \"ret_fund_12m_mean\",\n", + " \"ret_vol_3m\",\n", + " \"ret_vol_6m\",\n", + " \"drawdown_proxy\",\n", + " \"active_return_m\",\n", + " \"active_return_3m_mean\",\n", + " \"active_return_6m_mean\",\n", + " \"delta_rate_m\"\n", + "]:\n", + " if c in df_rel_m.columns:\n", + " df_rel_m[c] = pd.to_numeric(df_rel_m[c], errors=\"coerce\")\n", + "\n", + "display(df_rel_m.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "03d60451-3c2f-4d50-bb78-d00ba6a50a39", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "432" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_rel_m[\"Registrar Account - ID\"].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "667c5372-0438-4c30-b4f7-9ffd9e09b149", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "349" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_rel_m[\"Product - Isin\"].nunique() #349" + ] + }, + { + "cell_type": "markdown", + "id": "6f3b149c-899b-4c14-834e-6ec5a66cceca", + "metadata": {}, + "source": [ + "- ret_fund_m: Rendement mensuel du fond.\n", + "- ret_fund_3m_mean : Moyenne des rendements mensuels sur les 3 derniers mois.\n", + "- ret_fund_6m_mean : Moyenne des rendements mensuels sur les 6 derniers mois.\n", + "- ret_fund_12m_mean : Moyenne des rendements mensuels sur les 12 derniers mois.\n", + "- ret_vol_3m, ret_vol_6m : Volatilité récente du fond, utile pour capter si le client agit dans des phases agitées.\n", + "- drawdown_proxy : Distance au plus haut historique.\n", + "- active_return_m :Surperformance mensuelle vs benchmark.\n", + "- active_return_3m_mean, active_return_6m_mean : Surperformance moyenne récente." + ] + }, + { + "cell_type": "markdown", + "id": "90772aeb-9aeb-4cc1-bbe3-80f7ca794872", + "metadata": {}, + "source": [ + "## Clustering par fond" + ] + }, + { + "cell_type": "markdown", + "id": "aa6b70d5-7d78-4b72-923f-fd82956f4284", + "metadata": {}, + "source": [ + "## choisir les fonds et normaliser" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e860b58d-3f3b-457b-b574-5b94a4582a97", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Registrar Account - ID', 'Product - Isin', 'month', 'aum_qty',\n", + " 'aum_val', 'Product - Asset Type_x', 'Product - Strategy_x',\n", + " 'Product - Legal Status_x', 'Product - Is Dedie ?_x',\n", + " 'Product - Fund_x', 'Product - Shareclass Type_x',\n", + " 'Product - Shareclass Currency_x', 'Registrar Account - Region',\n", + " 'RegistrarAccount - Country', 'net_flow_qty', 'gross_flow_qty',\n", + " 'sub_qty', 'red_qty', 'n_tx', 'Product - Asset Type_y',\n", + " 'Product - Strategy_y', 'Product - Legal Status_y',\n", + " 'Product - Is Dedie ?_y', 'Product - Fund_y',\n", + " 'Product - Shareclass Type_y', 'Product - Shareclass Currency_y',\n", + " 'isin_held_flag', 'active_rel_month', 'flow_to_aum', 'turnover_rel',\n", + " 'ret_fund_m', 'ret_fund_3m_mean', 'ret_fund_6m_mean',\n", + " 'ret_fund_12m_mean', 'ret_vol_3m', 'ret_vol_6m', 'drawdown_proxy',\n", + " 'active_return_m', 'active_return_3m_mean', 'active_return_6m_mean',\n", + " 'delta_rate_m'],\n", + " dtype='object')" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_rel_m.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2fcb1f21-df60-4549-8876-b93d4126f1c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Product - IsinProduct - Isinfund_family
0FR0010135103FR0010135103FR0010135103
91FR0010147603FR0010147603FR0010147603
99FR0010148981FR0010148981FR0010148981
165FR0010149112FR0010149112FR0010149112
177FR0010149120FR0010149120FR0010149120
256FR0010149161FR0010149161FR0010149161
257FR0010149179FR0010149179FR0010149179
268FR0010149203FR0010149203FR0010149203
269FR0010149302FR0010149302FR0010149302
317FR0010306142FR0010306142FR0010306142
331FR0010312660FR0010312660FR0010312660
338FR0011269067FR0011269067FR0011269067
364FR0011269083FR0011269083FR0011269083
366FR0011269091FR0011269091FR0011269091
367FR0011269109FR0011269109FR0011269109
370FR0011269125FR0011269125FR0011269125
371FR0011269182FR0011269182FR0011269182
373FR0011269190FR0011269190FR0011269190
388FR0011269588FR0011269588FR0011269588
392FR0011269596FR0011269596FR0011269596
\n", + "
" + ], + "text/plain": [ + " Product - Isin Product - Isin fund_family\n", + "0 FR0010135103 FR0010135103 FR0010135103\n", + "91 FR0010147603 FR0010147603 FR0010147603\n", + "99 FR0010148981 FR0010148981 FR0010148981\n", + "165 FR0010149112 FR0010149112 FR0010149112\n", + "177 FR0010149120 FR0010149120 FR0010149120\n", + "256 FR0010149161 FR0010149161 FR0010149161\n", + "257 FR0010149179 FR0010149179 FR0010149179\n", + "268 FR0010149203 FR0010149203 FR0010149203\n", + "269 FR0010149302 FR0010149302 FR0010149302\n", + "317 FR0010306142 FR0010306142 FR0010306142\n", + "331 FR0010312660 FR0010312660 FR0010312660\n", + "338 FR0011269067 FR0011269067 FR0011269067\n", + "364 FR0011269083 FR0011269083 FR0011269083\n", + "366 FR0011269091 FR0011269091 FR0011269091\n", + "367 FR0011269109 FR0011269109 FR0011269109\n", + "370 FR0011269125 FR0011269125 FR0011269125\n", + "371 FR0011269182 FR0011269182 FR0011269182\n", + "373 FR0011269190 FR0011269190 FR0011269190\n", + "388 FR0011269588 FR0011269588 FR0011269588\n", + "392 FR0011269596 FR0011269596 FR0011269596" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ============================================================\n", + "# 1) Définir une \"famille économique\" de fond à partir de Product - Fund\n", + "# ============================================================\n", + "\n", + "def normalize_fund_name(x):\n", + " if pd.isna(x):\n", + " return np.nan\n", + " s = str(x).strip().upper()\n", + " s = s.replace(\"_\", \" \").replace(\"-\", \" \")\n", + " s = re.sub(r\"\\s+\", \" \", s).strip()\n", + " # enlève une année finale type 2023 / 2024 / 2025\n", + " s = re.sub(r\"\\b20\\d{2}\\b$\", \"\", s).strip()\n", + " s = re.sub(r\"\\s+\", \" \", s).strip()\n", + " return s\n", + "\n", + "df_rel_m[\"fund_family\"] = df_rel_m[\"Product - Isin\"].apply(normalize_fund_name)\n", + "\n", + "display(\n", + " df_rel_m[[ISIN_COL, \"Product - Isin\", \"fund_family\"]]\n", + " .drop_duplicates()\n", + " .head(20)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "95004754-7f24-4769-9f64-9cfbab43ddf8", + "metadata": {}, + "outputs": [], + "source": [ + "# # fond : Intuition\n", + "\n", + "# On veut étudier les fonds :\n", + "\n", + "# les plus importants en encours\n", + "# avec assez de clients\n", + "# et donc assez de signal pour clusteriser" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "31a67ca0-bdcd-4461-af7d-1184b5368a06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fund_familyaum_val_totaln_clientsn_isin
0FR00101491204.884612e+092821
1FR00101351034.502206e+092901
2FR00101489813.237251e+092601
3FR00140081Y11.421801e+091391
4LU09926249491.290916e+091511
5LU16237628431.206331e+092011
6FR001400KAV41.178233e+091291
7LU03360840321.051440e+092291
8FR00101493029.536118e+082551
9FR00101491619.442772e+081651
10LU09926312179.104539e+081391
11LU12993063218.665762e+081111
12FR001400U4S36.569309e+081201
13FR00103061426.219282e+082041
14FR00103126603.721387e+081681
\n", + "
" + ], + "text/plain": [ + " fund_family aum_val_total n_clients n_isin\n", + "0 FR0010149120 4.884612e+09 282 1\n", + "1 FR0010135103 4.502206e+09 290 1\n", + "2 FR0010148981 3.237251e+09 260 1\n", + "3 FR00140081Y1 1.421801e+09 139 1\n", + "4 LU0992624949 1.290916e+09 151 1\n", + "5 LU1623762843 1.206331e+09 201 1\n", + "6 FR001400KAV4 1.178233e+09 129 1\n", + "7 LU0336084032 1.051440e+09 229 1\n", + "8 FR0010149302 9.536118e+08 255 1\n", + "9 FR0010149161 9.442772e+08 165 1\n", + "10 LU0992631217 9.104539e+08 139 1\n", + "11 LU1299306321 8.665762e+08 111 1\n", + "12 FR001400U4S3 6.569309e+08 120 1\n", + "13 FR0010306142 6.219282e+08 204 1\n", + "14 FR0010312660 3.721387e+08 168 1" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ============================================================\n", + "# 2) Choisir les top fonds (au niveau fund_family)\n", + "# ============================================================\n", + "\n", + "TARGET_DATE = pd.Timestamp(\"2025-10-01\")\n", + "\n", + "top_fund_families = (\n", + " df_rel_m[df_rel_m[\"month\"] == TARGET_DATE]\n", + " .groupby(\"fund_family\", as_index=False)\n", + " .agg(\n", + " aum_val_total=(\"aum_val\", \"sum\"),\n", + " n_clients=(ID_COL, \"nunique\"),\n", + " n_isin=(ISIN_COL, \"nunique\")\n", + " )\n", + " .sort_values([\"aum_val_total\", \"n_clients\"], ascending=[False, False])\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "# garder des fonds assez gros et avec assez de clients\n", + "top_fund_families = top_fund_families[top_fund_families[\"n_clients\"] >= 20].head(15).copy()\n", + "\n", + "display(top_fund_families)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "d08cc033-0bf2-42a7-9f4d-ffee5afab88a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fund_familyaum_val_totaln_clientsn_isin
0FR00101491204.884612e+092821
1FR00101351034.502206e+092901
2FR00101489813.237251e+092601
3FR00140081Y11.421801e+091391
4LU09926249491.290916e+091511
5LU16237628431.206331e+092011
6FR001400KAV41.178233e+091291
7LU03360840321.051440e+092291
8FR00101493029.536118e+082551
9FR00101491619.442772e+081651
10LU09926312179.104539e+081391
11LU12993063218.665762e+081111
12FR001400U4S36.569309e+081201
13FR00103061426.219282e+082041
14FR00103126603.721387e+081681
\n", + "
" + ], + "text/plain": [ + " fund_family aum_val_total n_clients n_isin\n", + "0 FR0010149120 4.884612e+09 282 1\n", + "1 FR0010135103 4.502206e+09 290 1\n", + "2 FR0010148981 3.237251e+09 260 1\n", + "3 FR00140081Y1 1.421801e+09 139 1\n", + "4 LU0992624949 1.290916e+09 151 1\n", + "5 LU1623762843 1.206331e+09 201 1\n", + "6 FR001400KAV4 1.178233e+09 129 1\n", + "7 LU0336084032 1.051440e+09 229 1\n", + "8 FR0010149302 9.536118e+08 255 1\n", + "9 FR0010149161 9.442772e+08 165 1\n", + "10 LU0992631217 9.104539e+08 139 1\n", + "11 LU1299306321 8.665762e+08 111 1\n", + "12 FR001400U4S3 6.569309e+08 120 1\n", + "13 FR0010306142 6.219282e+08 204 1\n", + "14 FR0010312660 3.721387e+08 168 1" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_fund_families = top_fund_families[\n", + " (top_fund_families[\"n_clients\"] >= 20)\n", + "].copy()\n", + "top_fund_families #15 plus gros fond en terme de aum val total et n clients" + ] + }, + { + "cell_type": "markdown", + "id": "b56b8070-3d60-4d18-9c2f-f800c88ec804", + "metadata": {}, + "source": [ + "## Clustering par fund family" + ] + }, + { + "cell_type": "markdown", + "id": "1c765db8-7120-4176-a90c-2fffdbc85b54", + "metadata": {}, + "source": [ + "### construire la base mensuelle au niveau client × fond" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "5014d562-073b-4ff8-b2c3-cf7343e1badf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Registrar Account - IDfund_familymonthfund_aum_qtyfund_aum_valnet_flow_qtygross_flow_qtyn_txret_fund_mret_fund_6m_meanportfolio_aum_totalfund_weightheld_flagactive_flagflow_to_aumturnover
018872FR00101351032015-01-0149094.9153.242523e+07673.9901045.999.00.0655420.020252179864.6370.272953110.0137280.021305
118872FR00101351032015-02-0149797.9153.368032e+07988.0001712.0012.00.0256200.020209186761.7360.266637110.0198400.034378
218872FR00101351032015-03-0150302.6273.505691e+079.7101561.7112.00.0306140.024664190357.7180.264252110.0001930.031046
318872FR00101351032015-04-0150219.3933.452433e+07-123.2341830.1911.0-0.0239090.020089191429.3240.26233811-0.0024540.036443
418872FR00101351032015-05-0153685.3933.699729e+07121.000529.006.00.0032400.018805189056.4750.283963110.0022540.009854
\n", + "
" + ], + "text/plain": [ + " Registrar Account - ID fund_family month fund_aum_qty fund_aum_val \\\n", + "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n", + "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n", + "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n", + "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n", + "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n", + "\n", + " net_flow_qty gross_flow_qty n_tx ret_fund_m ret_fund_6m_mean \\\n", + "0 673.990 1045.99 9.0 0.065542 0.020252 \n", + "1 988.000 1712.00 12.0 0.025620 0.020209 \n", + "2 9.710 1561.71 12.0 0.030614 0.024664 \n", + "3 -123.234 1830.19 11.0 -0.023909 0.020089 \n", + "4 121.000 529.00 6.0 0.003240 0.018805 \n", + "\n", + " portfolio_aum_total fund_weight held_flag active_flag flow_to_aum \\\n", + "0 179864.637 0.272953 1 1 0.013728 \n", + "1 186761.736 0.266637 1 1 0.019840 \n", + "2 190357.718 0.264252 1 1 0.000193 \n", + "3 191429.324 0.262338 1 1 -0.002454 \n", + "4 189056.475 0.283963 1 1 0.002254 \n", + "\n", + " turnover \n", + "0 0.021305 \n", + "1 0.034378 \n", + "2 0.031046 \n", + "3 0.036443 \n", + "4 0.009854 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(962611, 16)\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# 3) Construire la base mensuelle client x fund_family x mois\n", + "# ============================================================\n", + "\n", + "portfolio_month = (\n", + " df_rel_m\n", + " .groupby([ID_COL, \"month\"], as_index=False)\n", + " .agg(portfolio_aum_total=(\"aum_qty\", \"sum\"))\n", + ")\n", + "\n", + "family_month = (\n", + " df_rel_m\n", + " .groupby([ID_COL, \"fund_family\", \"month\"], as_index=False)\n", + " .agg(\n", + " fund_aum_qty=(\"aum_qty\", \"sum\"),\n", + " fund_aum_val=(\"aum_val\", \"sum\"),\n", + " net_flow_qty=(\"net_flow_qty\", \"sum\"),\n", + " gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n", + " n_tx=(\"n_tx\", \"sum\"),\n", + " ret_fund_m=(\"ret_fund_m\", \"mean\"),\n", + " ret_fund_6m_mean=(\"ret_fund_6m_mean\", \"mean\"),\n", + " )\n", + " .merge(portfolio_month, on=[ID_COL, \"month\"], how=\"left\")\n", + ")\n", + "\n", + "family_month[\"fund_weight\"] = family_month[\"fund_aum_qty\"] / (family_month[\"portfolio_aum_total\"].abs() + 1.0)\n", + "family_month[\"held_flag\"] = (family_month[\"fund_aum_qty\"] > 0).astype(int)\n", + "family_month[\"active_flag\"] = (family_month[\"gross_flow_qty\"] > 0).astype(int)\n", + "family_month[\"flow_to_aum\"] = family_month[\"net_flow_qty\"] / (family_month[\"fund_aum_qty\"].abs() + 1.0)\n", + "family_month[\"turnover\"] = family_month[\"gross_flow_qty\"] / (family_month[\"fund_aum_qty\"].abs() + 1.0)\n", + "\n", + "display(family_month.head())\n", + "print(family_month.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "b1ce1fc3-9526-4d26-a791-9d46561972ef", + "metadata": {}, + "source": [ + "### Features :\n", + "L’idée est de capter :\n", + "\n", + " fréquence\n", + " intensité\n", + " taille\n", + " timing d’entrée/sortie\n", + " relation à la performance\n", + " rôle du fond dans le portefeuille\n", + " spécificités produit" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b65c3b5b-d270-409d-9c4d-5218262a5d92", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================\n", + "# 4) Construire le dataset client x fond pour le clustering\n", + "# ============================================================\n", + "\n", + "def build_fund_dataset(fund_family, min_months=3):\n", + " g = family_month[family_month[\"fund_family\"] == fund_family].sort_values([ID_COL, \"month\"]).copy()\n", + "\n", + " if g.empty:\n", + " return None\n", + "\n", + " g[\"prev_held\"] = g.groupby(ID_COL)[\"held_flag\"].shift(1).fillna(0).astype(int)\n", + " g[\"entry_event\"] = ((g[\"prev_held\"] == 0) & (g[\"held_flag\"] == 1)).astype(int)\n", + " g[\"exit_event\"] = ((g[\"prev_held\"] == 1) & (g[\"held_flag\"] == 0)).astype(int)\n", + " g[\"buy_month\"] = (g[\"net_flow_qty\"] > 0).astype(int)\n", + "\n", + " # rolling simples\n", + " g[\"turnover_6m_mean\"] = (\n", + " g.groupby(ID_COL)[\"turnover\"]\n", + " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n", + " )\n", + "\n", + " g[\"flow_to_aum_6m_mean\"] = (\n", + " g.groupby(ID_COL)[\"flow_to_aum\"]\n", + " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n", + " )\n", + "\n", + " df_fund = (\n", + " g.groupby(ID_COL, as_index=False)\n", + " .agg(\n", + " n_months_obs=(\"month\", \"nunique\"),\n", + " fund_aum_mean=(\"fund_aum_qty\", \"mean\"),\n", + " fund_aum_last=(\"fund_aum_qty\", \"last\"),\n", + " fund_weight_mean=(\"fund_weight\", \"mean\"),\n", + " fund_weight_last=(\"fund_weight\", \"last\"),\n", + " held_month_share=(\"held_flag\", \"mean\"),\n", + " active_month_share=(\"active_flag\", \"mean\"),\n", + " entry_count=(\"entry_event\", \"sum\"),\n", + " exit_count=(\"exit_event\", \"sum\"),\n", + " turnover_mean=(\"turnover\", \"mean\"),\n", + " turnover_6m_mean=(\"turnover_6m_mean\", \"last\"),\n", + " flow_to_aum_mean=(\"flow_to_aum\", \"mean\"),\n", + " flow_to_aum_6m_mean=(\"flow_to_aum_6m_mean\", \"last\"),\n", + " ret_fund_m_mean=(\"ret_fund_m\", \"mean\"),\n", + " ret_fund_6m_mean=(\"ret_fund_6m_mean\", \"mean\"),\n", + " buy_after_good_perf_share=(\"buy_month\", \"mean\"),\n", + " )\n", + " )\n", + "\n", + " # corr flux / perf\n", + " corr_block = (\n", + " g.groupby(ID_COL)\n", + " .apply(lambda z: compute_corr(z[\"net_flow_qty\"], z[\"ret_fund_m\"]))\n", + " .rename(\"corr_flow_ret_1m\")\n", + " .reset_index()\n", + " )\n", + "\n", + " df_fund = df_fund.merge(corr_block, on=ID_COL, how=\"left\")\n", + " df_fund = df_fund[df_fund[\"n_months_obs\"] >= min_months].copy()\n", + "\n", + " return df_fund" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f3f7adf4-6f4a-414d-89e5-b8f338f65c86", + "metadata": {}, + "outputs": [], + "source": [ + "# ============================================================\n", + "# 5) Outils de clustering\n", + "# ============================================================\n", + "\n", + "def prep_matrix(df, feature_cols):\n", + " X = df[feature_cols].copy()\n", + " X = X.replace([np.inf, -np.inf], np.nan)\n", + "\n", + " pipe = Pipeline([\n", + " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", + " (\"scaler\", RobustScaler()),\n", + " ])\n", + "\n", + " X_scaled = pipe.fit_transform(X)\n", + " return X_scaled\n", + "\n", + "def cluster_balance_summary(labels):\n", + " vc = pd.Series(labels).value_counts().sort_index()\n", + " n = int(vc.sum())\n", + " return {\n", + " \"n_clusters\": int(len(vc)),\n", + " \"min_cluster_size\": int(vc.min()),\n", + " \"max_cluster_size\": int(vc.max()),\n", + " \"dominant_cluster_share\": float(vc.max() / n) if n > 0 else np.nan,\n", + " \"singleton_clusters\": int((vc == 1).sum()),\n", + " }\n", + "\n", + "def evaluate_partition(X_scaled, labels):\n", + " out = cluster_balance_summary(labels)\n", + "\n", + " if len(np.unique(labels)) < 2:\n", + " out[\"silhouette\"] = np.nan\n", + " out[\"davies_bouldin\"] = np.nan\n", + " out[\"calinski_harabasz\"] = np.nan\n", + " return out\n", + "\n", + " out[\"silhouette\"] = silhouette_score(X_scaled, labels)\n", + " out[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n", + " out[\"calinski_harabasz\"] = calinski_harabasz_score(X_scaled, labels)\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "668aed37-0f3c-4d2b-9b8f-828d4ce24889", + "metadata": {}, + "outputs": [], + "source": [ + "# k=2,.,10 pour chaque fond \n", + "\n", + "# ============================================================\n", + "# 6) Tester K = 2 à 10 pour un fond\n", + "# ============================================================\n", + "\n", + "FEATURE_COLS_FUND = [\n", + " \"fund_weight_mean\",\n", + " \"fund_weight_last\",\n", + " \"fund_aum_mean\",\n", + " \"fund_aum_last\",\n", + " \"held_month_share\",\n", + " \"active_month_share\",\n", + " \"entry_count\",\n", + " \"exit_count\",\n", + " \"turnover_mean\",\n", + " \"turnover_6m_mean\",\n", + " \"flow_to_aum_mean\",\n", + " \"flow_to_aum_6m_mean\",\n", + " \"corr_flow_ret_1m\",\n", + " \"buy_after_good_perf_share\",\n", + "]\n", + "\n", + "def run_kmeans_grid_for_fund(fund_family, k_min=4, k_max=10):\n", + " df_fund = build_fund_dataset(fund_family)\n", + "\n", + " if df_fund is None or df_fund.empty or len(df_fund) < 10:\n", + " return None\n", + "\n", + " feature_cols = [c for c in FEATURE_COLS_FUND if c in df_fund.columns and not df_fund[c].isna().all()]\n", + " X_scaled = prep_matrix(df_fund, feature_cols)\n", + "\n", + " rows = []\n", + " models = {}\n", + "\n", + " for k in range(k_min, k_max + 1):\n", + " if k >= len(df_fund):\n", + " continue\n", + "\n", + " km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=50)\n", + " labels = km.fit_predict(X_scaled)\n", + "\n", + " rows.append({\n", + " \"k\": k,\n", + " **evaluate_partition(X_scaled, labels)\n", + " })\n", + " models[k] = labels\n", + "\n", + " diag = pd.DataFrame(rows)\n", + " return {\n", + " \"data\": df_fund,\n", + " \"features\": feature_cols,\n", + " \"diag\": diag,\n", + " \"models\": models,\n", + " \"X_scaled\": X_scaled,\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7ce93ee1-129d-41be-af41-958361f58bf3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de fonds étudiés : 15\n", + "['FR0010149120', 'FR0010135103', 'FR0010148981', 'FR00140081Y1', 'LU0992624949', 'LU1623762843', 'FR001400KAV4', 'LU0336084032', 'FR0010149302', 'FR0010149161', 'LU0992631217', 'LU1299306321', 'FR001400U4S3', 'FR0010306142', 'FR0010312660']\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# 7) Lancer l'étude sur tous les top fonds\n", + "# ============================================================\n", + "\n", + "fund_results = {}\n", + "\n", + "for fund_family in top_fund_families[\"fund_family\"]:\n", + " res = run_kmeans_grid_for_fund(fund_family, k_min=4, k_max=10)\n", + " if res is not None:\n", + " fund_results[fund_family] = res\n", + "\n", + "print(\"Nombre de fonds étudiés :\", len(fund_results))\n", + "print(list(fund_results.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "bfb36134-0e65-42d5-a76c-a901a2a35816", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kn_clustersmin_cluster_sizemax_cluster_sizedominant_cluster_sharesingleton_clusterssilhouettedavies_bouldincalinski_harabaszfund_family
04413170.99062530.9527020.0142586.236159e+04FR0010149120
15513140.98125030.8883340.2600386.892536e+04FR0010149120
26613020.94375030.7797850.3174557.565806e+04FR0010149120
37712870.89687530.7834680.3629429.135321e+04FR0010149120
48812830.88437530.8028900.3754581.060793e+05FR0010149120
59912830.88437540.8028100.3530711.197763e+05FR0010149120
6101012800.87500040.7854370.3616891.324484e+05FR0010149120
74413230.96417920.8625260.3967611.072268e+07FR0010135103
85513190.95223920.8781780.4082511.251565e+07FR0010135103
96613170.94626920.8774370.4380451.375313e+07FR0010135103
107713170.94626920.8735530.3484041.616668e+07FR0010135103
118813050.91044820.8147950.3809751.996607e+07FR0010135103
129912890.86268720.7715770.4416112.164950e+07FR0010135103
13101012920.87164240.7807760.3442812.352434e+07FR0010135103
144413110.98417720.9512340.2460021.197836e+05FR0010148981
155513080.97468430.9235680.2918311.194490e+05FR0010148981
166613070.97151940.9252310.2959441.336923e+05FR0010148981
177713060.96835440.9019490.1961621.719620e+05FR0010148981
188812980.94303840.8790110.2312912.574472e+05FR0010148981
199912970.93987350.8830010.2229992.976192e+05FR0010148981
20101012910.92088650.8537950.2663403.422146e+05FR0010148981
214411300.92857120.8466600.2728051.058871e+03FR00140081Y1
225511240.88571420.8011990.2912061.457334e+03FR00140081Y1
236611160.82857120.7980780.3542932.012263e+03FR00140081Y1
247711160.82857130.7973750.3027752.397924e+03FR00140081Y1
258811150.82142940.7847890.2432823.247377e+03FR00140081Y1
269911150.82142940.7762580.3548553.919863e+03FR00140081Y1
27101011060.75714340.6789700.3699644.284648e+03FR00140081Y1
284411850.96858620.9376350.0922287.051123e+03LU0992624949
295511830.95811520.9397910.1749301.430396e+04LU0992624949
\n", + "
" + ], + "text/plain": [ + " k n_clusters min_cluster_size max_cluster_size \\\n", + "0 4 4 1 317 \n", + "1 5 5 1 314 \n", + "2 6 6 1 302 \n", + "3 7 7 1 287 \n", + "4 8 8 1 283 \n", + "5 9 9 1 283 \n", + "6 10 10 1 280 \n", + "7 4 4 1 323 \n", + "8 5 5 1 319 \n", + "9 6 6 1 317 \n", + "10 7 7 1 317 \n", + "11 8 8 1 305 \n", + "12 9 9 1 289 \n", + "13 10 10 1 292 \n", + "14 4 4 1 311 \n", + "15 5 5 1 308 \n", + "16 6 6 1 307 \n", + "17 7 7 1 306 \n", + "18 8 8 1 298 \n", + "19 9 9 1 297 \n", + "20 10 10 1 291 \n", + "21 4 4 1 130 \n", + "22 5 5 1 124 \n", + "23 6 6 1 116 \n", + "24 7 7 1 116 \n", + "25 8 8 1 115 \n", + "26 9 9 1 115 \n", + "27 10 10 1 106 \n", + "28 4 4 1 185 \n", + "29 5 5 1 183 \n", + "\n", + " dominant_cluster_share singleton_clusters silhouette davies_bouldin \\\n", + "0 0.990625 3 0.952702 0.014258 \n", + "1 0.981250 3 0.888334 0.260038 \n", + "2 0.943750 3 0.779785 0.317455 \n", + "3 0.896875 3 0.783468 0.362942 \n", + "4 0.884375 3 0.802890 0.375458 \n", + "5 0.884375 4 0.802810 0.353071 \n", + "6 0.875000 4 0.785437 0.361689 \n", + "7 0.964179 2 0.862526 0.396761 \n", + "8 0.952239 2 0.878178 0.408251 \n", + "9 0.946269 2 0.877437 0.438045 \n", + "10 0.946269 2 0.873553 0.348404 \n", + "11 0.910448 2 0.814795 0.380975 \n", + "12 0.862687 2 0.771577 0.441611 \n", + "13 0.871642 4 0.780776 0.344281 \n", + "14 0.984177 2 0.951234 0.246002 \n", + "15 0.974684 3 0.923568 0.291831 \n", + "16 0.971519 4 0.925231 0.295944 \n", + "17 0.968354 4 0.901949 0.196162 \n", + "18 0.943038 4 0.879011 0.231291 \n", + "19 0.939873 5 0.883001 0.222999 \n", + "20 0.920886 5 0.853795 0.266340 \n", + "21 0.928571 2 0.846660 0.272805 \n", + "22 0.885714 2 0.801199 0.291206 \n", + "23 0.828571 2 0.798078 0.354293 \n", + "24 0.828571 3 0.797375 0.302775 \n", + "25 0.821429 4 0.784789 0.243282 \n", + "26 0.821429 4 0.776258 0.354855 \n", + "27 0.757143 4 0.678970 0.369964 \n", + "28 0.968586 2 0.937635 0.092228 \n", + "29 0.958115 2 0.939791 0.174930 \n", + "\n", + " calinski_harabasz fund_family \n", + "0 6.236159e+04 FR0010149120 \n", + "1 6.892536e+04 FR0010149120 \n", + "2 7.565806e+04 FR0010149120 \n", + "3 9.135321e+04 FR0010149120 \n", + "4 1.060793e+05 FR0010149120 \n", + "5 1.197763e+05 FR0010149120 \n", + "6 1.324484e+05 FR0010149120 \n", + "7 1.072268e+07 FR0010135103 \n", + "8 1.251565e+07 FR0010135103 \n", + "9 1.375313e+07 FR0010135103 \n", + "10 1.616668e+07 FR0010135103 \n", + "11 1.996607e+07 FR0010135103 \n", + "12 2.164950e+07 FR0010135103 \n", + "13 2.352434e+07 FR0010135103 \n", + "14 1.197836e+05 FR0010148981 \n", + "15 1.194490e+05 FR0010148981 \n", + "16 1.336923e+05 FR0010148981 \n", + "17 1.719620e+05 FR0010148981 \n", + "18 2.574472e+05 FR0010148981 \n", + "19 2.976192e+05 FR0010148981 \n", + "20 3.422146e+05 FR0010148981 \n", + "21 1.058871e+03 FR00140081Y1 \n", + "22 1.457334e+03 FR00140081Y1 \n", + "23 2.012263e+03 FR00140081Y1 \n", + "24 2.397924e+03 FR00140081Y1 \n", + "25 3.247377e+03 FR00140081Y1 \n", + "26 3.919863e+03 FR00140081Y1 \n", + "27 4.284648e+03 FR00140081Y1 \n", + "28 7.051123e+03 LU0992624949 \n", + "29 1.430396e+04 LU0992624949 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# ============================================================\n", + "# 8) Tableau de synthèse par fond et par K\n", + "# ============================================================\n", + "\n", + "rows = []\n", + "\n", + "for fund_family, res in fund_results.items():\n", + " diag = res[\"diag\"].copy()\n", + " diag[\"fund_family\"] = fund_family\n", + " rows.append(diag)\n", + "\n", + "df_fund_diag = pd.concat(rows, axis=0).reset_index(drop=True)\n", + "\n", + "display(df_fund_diag.head(30))" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "66ea6e29-47dd-4fc0-900f-225d3c8a2b1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==============================\n", + "FOND : FR0010149120\n", + "K retenu : 4\n", + "cluster\n", + "0 317\n", + "1 1\n", + "2 1\n", + "3 1\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR0010135103\n", + "K retenu : 5\n", + "cluster\n", + "0 4\n", + "1 1\n", + "2 319\n", + "3 1\n", + "4 10\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR0010148981\n", + "K retenu : 4\n", + "cluster\n", + "0 311\n", + "1 1\n", + "2 1\n", + "3 3\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR00140081Y1\n", + "K retenu : 4\n", + "cluster\n", + "0 130\n", + "1 1\n", + "2 1\n", + "3 8\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : LU0992624949\n", + "K retenu : 5\n", + "cluster\n", + "0 183\n", + "1 1\n", + "2 1\n", + "3 4\n", + "4 2\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : LU1623762843\n", + "K retenu : 5\n", + "cluster\n", + "0 206\n", + "1 1\n", + "2 1\n", + "3 3\n", + "4 1\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR001400KAV4\n", + "K retenu : 4\n", + "cluster\n", + "0 125\n", + "1 1\n", + "2 1\n", + "3 2\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : LU0336084032\n", + "K retenu : 4\n", + "cluster\n", + "0 265\n", + "1 1\n", + "2 5\n", + "3 1\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR0010149302\n", + "K retenu : 4\n", + "cluster\n", + "0 309\n", + "1 1\n", + "2 1\n", + "3 2\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR0010149161\n", + "K retenu : 4\n", + "cluster\n", + "0 203\n", + "1 1\n", + "2 1\n", + "3 1\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : LU0992631217\n", + "K retenu : 4\n", + "cluster\n", + "0 156\n", + "1 1\n", + "2 1\n", + "3 3\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : LU1299306321\n", + "K retenu : 5\n", + "cluster\n", + "0 9\n", + "1 3\n", + "2 110\n", + "3 4\n", + "4 5\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR001400U4S3\n", + "K retenu : 4\n", + "cluster\n", + "0 111\n", + "1 1\n", + "2 3\n", + "3 1\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR0010306142\n", + "K retenu : 4\n", + "cluster\n", + "0 247\n", + "1 1\n", + "2 3\n", + "3 2\n", + "Name: count, dtype: int64\n", + "\n", + "==============================\n", + "FOND : FR0010312660\n", + "K retenu : 4\n", + "cluster\n", + "0 2\n", + "1 205\n", + "2 9\n", + "3 2\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# 9) Choisir un K par fond et afficher les tailles des clusters\n", + "# Ici : on prend le meilleur silhouette sans singleton si possible\n", + "# ============================================================\n", + "\n", + "best_partitions = {}\n", + "\n", + "for fund_family, res in fund_results.items():\n", + " diag = res[\"diag\"].copy()\n", + "\n", + " diag2 = diag[diag[\"singleton_clusters\"] == 0].copy()\n", + " if diag2.empty:\n", + " diag2 = diag.copy()\n", + "\n", + " diag2 = diag2.sort_values(\n", + " [\"silhouette\", \"dominant_cluster_share\"],\n", + " ascending=[False, True]\n", + " )\n", + "\n", + " best_k = int(diag2.iloc[0][\"k\"])\n", + " labels = res[\"models\"][best_k]\n", + "\n", + " df_f = res[\"data\"].copy()\n", + " df_f[\"cluster\"] = labels\n", + "\n", + " best_partitions[fund_family] = {\n", + " \"k\": best_k,\n", + " \"data\": df_f\n", + " }\n", + "\n", + " print(\"\\n==============================\")\n", + " print(\"FOND :\", fund_family)\n", + " print(\"K retenu :\", best_k)\n", + " print(df_f[\"cluster\"].value_counts().sort_index())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "e5e397ed-3514-4377-8c9b-ed474e70f8b8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAocAAAHWCAYAAAAFLiMtAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAXttJREFUeJzt3XlcVOX+B/DPMDAzDDADyK4gKu4bhkqoLRZJZqXWNbJyT9Owew1zy1JbTNMWzUzNCrvXJfP2S80FJdTKRE0FcwFXDBUBEZlBdpjn98dcTo4zICgwMH7er9e8bM7znXOecxj00znneY5MCCFARERERATAztodICIiIqKGg+GQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRFQnVq1aBZlMJr1UKhXatGmDiRMnIjMz06w+MzMTb7zxBtq1awe1Wg0nJyeEhITg/fffR25ursVt9OzZEzKZDMuWLauz/bh5H25++fj4SDVz5swxaXNwcEBgYCD++c9/Vtr3ffv2oU+fPlCr1fDx8cE///lP3Lhxw6yuuLgY06ZNg5+fHxwdHREaGoq4uDizup07d2LMmDHo1KkT5HI5AgMDK90ng8GABQsWoEWLFlCpVOjSpQvWrVtnVrdy5Uo89NBD8Pb2hlKpRIsWLTBq1ChcuHDBrHbZsmUYMmQIAgICIJPJMHLkSIvbjo+Px+jRo9GmTRuo1Wq0bNkSL7/8Mq5cuWKxvqSkBB988AHatWsHlUoFb29vDBgwAJcuXTKrPXLkCJ5++mm4u7tDrVajU6dO+Oyzz8zqqnvsie5V9tbuABHZtnfffRctWrRAUVER9u7di2XLlmHbtm04fvw41Go1AOCPP/7AE088gRs3buCll15CSEgIAODQoUOYP38+fv31V+zcudNkvWfOnMEff/yBwMBArFmzBhMmTKizfXjssccwfPhwk2WOjo5mdcuWLYOzszPy8/MRHx+PJUuW4MiRI9i7d69JXVJSEh599FG0b98en3zyCS5duoSPPvoIZ86cwfbt201qR44cif/+97+YNGkSWrdujVWrVuGJJ57A7t270adPH6lu7dq1WL9+Pe677z74+flVuT8zZ87E/PnzMXbsWPTo0QObNm3CCy+8AJlMhueff16qS0xMRIsWLfD000/Dzc0NqampWLlyJbZs2YKjR4+abOfDDz9EXl4eevbsWWnQA4Bp06YhJycHQ4YMQevWrXH+/Hl8/vnn2LJlC5KSkkxCd2lpKQYMGIB9+/Zh7Nix6NKlC65fv44DBw5Ap9OhWbNmUu3OnTvx1FNPoVu3bnj77bfh7OyMc+fOmYXImhx7onuWICKqAzExMQKA+OOPP0yWR0dHCwBi7dq1Qgghrl+/Lpo2bSq8vb1FcnKy2XoyMjLEe++9Z7Z81qxZwsvLS/zwww9CJpOJ1NTUOtkPACIqKqrKmtmzZwsA4urVqybLIyMjBQBx4MABk+X9+/cXvr6+QqfTSctWrlwpAIgdO3ZIyw4cOCAAiIULF0rLCgsLRatWrURYWJjJOi9fvixKSkqEEEIMGDBANG/e3GJfL126JBwcHEz2yWAwiAceeEA0a9ZMlJWVVbmvhw4dEgDEvHnzTJZfuHBBGAwGIYQQTk5OYsSIERY//8svv4jy8nKzZQDEzJkzTZZ/+OGHwsHBwez43Uqn0wlvb28xePBgs3XfqrrHnuhexsvKRFSvHnnkEQBAamoqAGDFihW4fPkyPvnkE7Rr186s3tvbG2+99ZbZ8rVr1+If//gHnnzySWi1Wqxdu7ZuO34HHnjgAQDAuXPnpGV6vR5xcXF46aWXoNFopOXDhw+Hs7Mzvv/+e2nZf//7X8jlcowbN05aplKpMGbMGCQkJODixYvScj8/Pzg4ONy2T5s2bUJpaSleffVVaZlMJsOECRNw6dIlJCQkVPn5isvVt14ub968OWQy2W23/+CDD8LOzs5smbu7O5KTk6VlBoMBixcvxuDBg9GzZ0+UlZWhoKDA4jrXrl2LzMxMzJ07F3Z2dsjPz4fBYDCrq8mxJ7qXMRwSUb2qCEpNmjQBAGzevBmOjo74xz/+Ue11HDhwAGfPnsXQoUOhUCjwzDPPYM2aNXXSXwAoKipCdna2yau4uPi2n6u4N8/NzU1aduzYMZSVlaF79+4mtQqFAsHBwUhMTJSWJSYmok2bNiZBBjDeawkYL5HWVGJiIpycnNC+fXuL67x5+xWuXbuGrKwsHDp0CKNGjQIAPProozXedmVu3LiBGzduwMPDQ1p28uRJpKeno0uXLhg3bhycnJzg5OSELl26YPfu3Saf//nnn6HRaHD58mW0bdsWzs7O0Gg0mDBhAoqKiqS6mhx7onsZwyER1SmdTofs7GxcunQJ69evx7vvvgtHR0c8+eSTAIDk5GS0adMGCoWi2utcvXo1/P390bt3bwDA888/j5MnT95RWKqOr7/+Gp6eniYvSwM4cnJykJ2djb/++gsxMTFYunQpPD098eCDD0o1Fffj+fr6mn3e19cX6enpJrWV1QEwqa2uK1euwNvb2+wsX1XrbNq0Kby9vdGjRw/s27cPn332GR577LEab7syixYtQklJCSIjI6VlZ86cAQB8+umn2LNnD1asWIGYmBgUFRXh8ccfx59//mlSW1ZWhoEDByIiIgI//PADRo8ejeXLl0thtmLfb97Xm9167InuZRyQQkR1Kjw83OR98+bNsWbNGjRt2hSA8VKfi4tLtddXVlaG9evXY8SIEVLAeeSRR+Dl5YU1a9YgODi41vpeYeDAgZg4caLJso4dO5rVtW3b1uR9586dERMTIw28AYDCwkIAgFKpNPu8SqWS2itqK6u7eV01cSfr3L59O4qKipCcnIzVq1cjPz+/xtutzK+//op33nkHzz33nHTLAQBp9HBeXh4SExPh7+8PwPizDgoKwoIFC7B69WqptqCgAOPHj5dGJz/zzDMoKSnBihUr8O6776J169Y1OvZE9zKGQyKqU0uXLkWbNm1gb28Pb29vtG3b1uSeM41Gg7y8vGqvb+fOnbh69Sp69uyJs2fPSsv79u2LdevW4cMPPzS7p+1mGRkZJu+1Wq3Fkcc3a9asmVnIteSHH36ARqPB1atX8dlnnyE1NdVs3RXvLV2WLioqMql3dHSstO7mddXEnayzb9++AID+/ftj4MCB6NSpE5ydnc0Cc02lpKRg8ODB6NSpE7766iuzfgJA7969pWAIAAEBAejTpw/27dtnVjt06FCTdbzwwgtYsWIFEhIS0Lp16xode6J7GcMhEdWpnj17mt3jdbN27dohKSkJJSUl1bq0XHFv4XPPPWex/ZdffpHCjCW3XlKMiYmpdE6+mnrwwQel++aeeuopdO7cGS+++CIOHz4sBdaK7Vua7uXKlSsm08P4+vri8uXLFusA3HbKGkt8fX2xe/duCCFMLi1Xd52tWrVCt27dsGbNmrsKhxcvXkS/fv2g1Wqxbds2s7PHFf3w9vY2+6yXl5fJ/YF+fn44ceKEWa2XlxcA4Pr16wBqduyJ7mW855CIrOqpp55CYWEhfvjhh9vW5ufnY9OmTYiMjMSGDRvMXr6+vrcdmBIXF2fyioiIqK1dMeHs7IzZs2cjKSnJZBRsp06dYG9vj0OHDpnUl5SUICkpyeSyeHBwME6fPg29Xm9Se+DAAam9poKDg1FQUGAyMrim6ywsLIROp6vxtitcu3YN/fr1Q3FxMXbs2GHxHsDOnTvDwcHBYjhOT0+Hp6en9L5iXsxbayvuIayorcmxJ7qnWXsuHSKyTZXNc3irnJwc4evrK3x9fcWpU6fM2jMzM6V5Dv/zn/8IAOLXX3+1uK6xY8cKV1dXUVRUdPc78D+4i3kOS0pKRLNmzURwcLDJ8scff1z4+voKvV4vLfvqq68EALF9+3Zp2f79+83mOSwqKhJBQUEiNDS00v5UNc/hxYsXK53nsGnTptI8h6WlpSInJ8fs8wcOHBByuVwMGzas0u1XNc/hjRs3RM+ePYWLi4s4dOhQpesQQoiBAwcKuVxuMv/lyZMnhVwuF6+++qq07MiRIwKAeOGFF0w+P3ToUGFvby8uX74sLavusSe6l/GyMhFZlZubG3788Uc88cQTCA4ONnlCypEjR7Bu3TqEhYUBMF5SbtKkCXr16mVxXU8//TRWrlyJrVu34plnnqm3faiMg4MD/vWvf2HKlCmIjY3F448/DgCYO3cuevXqhYceegjjxo3DpUuX8PHHH6Nfv35SDQCEhoZiyJAhmDFjBrKyshAUFIRvv/0WFy5cwNdff22yrT///BObN28GAJw9exY6nQ7vv/8+AKBr16546qmnABjvn5w0aRIWLlyI0tJS9OjRAxs3bsRvv/2GNWvWQC6XAzAO8vD390dkZCQ6duwIJycnHDt2DDExMdBqtXj77bdNtv/TTz/h6NGjAIxPNvnzzz+l7T/99NPo0qULAODFF1/EwYMHMXr0aCQnJ5ucwXR2dsagQYOk9x988AHi4+PxyCOP4J///CcA4LPPPoO7uzvefPNNqa5bt24YPXo0vvnmG5SVleGhhx7Cnj17sGHDBsyYMcPkcnF1jz3RPc3a6ZSIbFN1zxxWSE9PF6+//rpo06aNUKlUQq1Wi5CQEDF37lyh0+lEZmamsLe3r/KMVUFBgVCr1WLw4MG1tRt3deZQCOPTO7RarXjooYdMlv/222+iV69eQqVSCU9PTxEVFWVyNqtCYWGheOONN4SPj49QKpWiR48eIjY21qyu4nhbet16Fq+8vFx88MEHonnz5kKhUIiOHTuK1atXm9QUFxeLf/3rX6JLly5Co9EIBwcH0bx5czFmzBiLT6MZMWJEpduPiYmR6po3b15pnaWznYcPHxbh4eHCyclJuLi4iIEDB4rTp0+b1ZWUlIg5c+aI5s2bCwcHBxEUFCQ+/fRTs7qaHHuie5VMCCHqNY0SERERUYPFASlEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpJwEmwrMRgMSE9Ph4uLi8nzTYmIiIhqmxACeXl58PPzk571XhmGQytJT0+Hv7+/tbtBRERE95CLFy+iWbNmVdYwHFqJi4sLAOMPSaPRWLk3REREZMv0ej38/f2l/FEVhkMrqbiUrNFoGA6JiIioXlTnVjYOSCEiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSfj4PBtTZihDZn4mDmYcxDndOXT26IwuHl3g6+xr7a4RERFRI8BwaEMMwoCT107i5Z0vo7CsUFrurnJHTEQMWrq2tGLviIiIqDHgZWUbklWQhdd2vWYSDAEgpygHU3+dipyiHCv1jIiIiBoLhkMbklWQVWkAPHX9FK4XXa/nHhEREVFjw3BoQwpKC6psLykvqaeeEBERUWPFcGhD/Jz9IIPMYpuTgxO0Sm0994iIiIgaG4ZDG+Kucsfz7Z632PZat9fg5ehVzz0iIiKixoajlW2Is8IZ47uMR6AmECuPrUR2YTaauTTDpG6TcL/v/bCX88d9r9EX61FcXgxHe0c4K5yt3R0iImoEZEIIYe1O3Iv0ej20Wi10Oh00Gk2trlsIgauFV1FmKIODnQM81Z61un5q+HTFOiRfS8byo8tx6cYltHVriwnBE9BS2xJqB7W1u0dERPWsJrmDp5JskEwmg5eal5DvVYVlhdh0dhMWHlooLcssyMRvl3/D4r6L8ZD/Q7CT8Y4SIiKyjP9CENmYa4XX8OmRT82WCwi8k/AOrhZctUKviIiosWA4JLIx6TfSUWYos9h2regacotz67dDRETUqDAcEtkYuZ28ynZeUiYioqrwnkMiG+Pr5Au1vRoFZeaTojdzaQZXpWv9d6qBKyotR5a+CAnnryE7rwShLd3RvIkTPF2U1u4aEVG9YzgksjGejp74oM8HiP4lGgZhkJYr5UrM7zOfo9dvUVRajl9OX0XUmiMoM/w9eUOwvyuWvXQffLWOVuwdEVH941Q2VlKXU9kQFZYV4nLeZXx/+nuczz2PLp5dMChoEPyc/WBvx/8nvNlf1/LxyMe/oNxg/lfh6N6BmN6/HRT2VV+qbwiEELh6oxgGA+CisoeTkj9nIvobp7Ihusc52jsiyC0IU3tMRUl5CZRy5W3vRbxX7T2TbTEYAsC6gxfx8gMt4efasM8eZumLsP14Br7emwpdYSn6BHlgUnhrBDZxgoM97zElopphOCSyYfZ29jxTeBsZ+qJK2wpLy00uNTdE2XnFmPz9Ufx2NltatvXYFfycnImNUb3R3rf6VyaybxTj8vVCHEi9Bje1Aj1buMNLo4KjQ/X+x6K0vBw5+aUQAnB3cmgUZ1yJyBz/1SCie1rvVh5Ysuusxba23i5QKxp2wEnLKTAJhhWKywz4YFsyPn/hPmgdHW67nkx9EaLXJ+H3c9ekZXI7GT57vhv6tvOEWlH1PxeXrxfg24S/8H9HLgEABgY3xajegWjmxifyEDU2vN5ARPe0lp5OaO/rYrFt9lMd4OHcsEcsx53MrLTttzPZuFFUett1lJUbsO5gmkkwBIByg8Br644gs4qzqwBwObcQz63Yjy9/PY/sGyXIvlGCr/em4h/LEnD5uvmoeSJq2BgOieie5qVR4esRPfBCzwAo/3d/XmsvZ6we0xNd/F2t27lqcFJWfmZTaW8HmUx223Vk3yhGzO8XLLYZBLDzROUB1GAQ2H7sCi7nFpq1ZeiLsPnoFRga8KV5IQSy8oqQoStEfrHlyeOJ7jW8rExE9zw/V0fMeqoDovq2QplBwElhD49GMsdhv44++GjnaYttg7s1hbuT4rbrKBeArrDyM4xV3ZepLyrF5qPplbb/dDQdkT2awd2p4R3PTH0Rth67gm/2pkJfWIreQR54/bE2CGyihsJeDl1hCTL1xdiVnIXScgMeaecFX1dHuDspkJ1XjFKDATIYz7Da2cng6ayEvZznXKjxYzgkIgKgcpCjaSO8P85Ho8Ib/dqYBcRmbo6I6hsEVTUGkzg6yNG1mRZHL+kstj/YuvK5MeV2siq3oXKQQ27X8AJTxUCevTfdr7n9eAZ2pWThx1d7w1erwle/pWLpnr/vR/047jQGB/thZO8WeH19EjL0RejUVIvRvVtgd0omvDQqDLu/Obw0qjvrVHEeUJIP2KsAR9e73EOiO9fwfmOJiKjaNI4OGBYWiC2v9cELPf3Rr4M3FkUG4/tXwuDvXr2w6+6kwNtPdoClK9AtPCq/JxMAXFQOGNUrsNL2Ub0DqzUgpr79lVNgEgwrFJcZsDj+NM5k5ZkEwwo/JqUj6WIuAKCgpBwHU3MwYc1h9GjhjvjkLEzZ8Ceu3SiuWWdK8oH0ROCHl4GVjwDrngfO7QIKcu5k14juGsMhEVEjp3V0QKemWswd3BlfvHgfBnVrWuO5GTv4arBmTChaezkDABzkMgzq5of/jOkJn9s8JaZ7oBseaedltvyh1p4IbeFeo37Ul5+rGMjjonJAzL4Llbb/cOQSBnTxld4LAXwadwYv3d8cv5y5igxd1QN4TAgBXNgLrOwLnI4F9JeBtATgP4OBI/82Bsc7ZTAAuReBsz8b13XpMHAjq2bryM8GdOlA/lWgrOTO+0KNCi8rExHZCJlMBnv57QegWKJW2qNXkAfWjb0fN4rLYG8ng7uTAupqPGnF00WFD5/tgjNZeVh3IA0CwNCeAWjt7Qwvlzu8xFrHnKsYyKNV2eNiTuWjrK8XlMDplql9LucWwlVtPEOaeDEXHZtqq9eRvCvAT/80hsRb7XoP6DgIUDhVb103MxiAjKPGkFl4/e/lft2AyNWAtlnlny3UGc9aluQBkAHCANg7ABcPAgpn42dVroBMDqhda943avAYDomISOLhoryjwTieLkp4uihxf4smAAA7uzsLqfXlsY4+WFjJQJ4mzgo82t4LB1ItX9btGeiOk1f0Zsvl/9vn6gwCkhTkAHkZltsMZcC184BbYPXXVyEv3TwYAsbL1ztnAQOXWA6dN64COeeM9z2e2QG0exL4IwY4/M3fAValBf4RA3h2AK7lABofwKHx3a9LleNlZSIiqjV2drIGHwwBwNNFgTf6tTFb7u/uiKeDm+KJzr5oYiHkOTrIMTC4KWKPmwa6+wLccDJdD6W9HTpX96whANzusZbyO7xfM/useTCskLzRGAJvVVYMZJ0Erp4C9OmAZ3vg/C/Aoa9Nz2wW6YB1kcYzi/rLQO6lO+sjNVgMh3dh6dKlCAwMhEqlQmhoKA4ePGjtLhERUTWczcyHrqAUXw4LwXPd/RHR0Qezn+qAL4d1h7dGhWZuavx3Qi/07+SDiqz7QGsPrBt3Pz7ZeQol5QZpXe5OCvzz0SB890calr8UAm9NDc68OroDTVpZbnNQA27N72wHb1ypvM1QDpSZz0uJG1nGexx9ugBH1wCu/sDBLy2vo7wUSNkKOPsaP2MpbFKjxcvKd2j9+vWIjo7G8uXLERoaikWLFiEiIgKnTp2Cl5f5jdlERNQwZOqLMGl9Ei7nFuI/B/7CA6094eggx+r9afh452nsmPQAmrqp0cLDCQuHdMHMAe0hhHHgj71cho+eC8aPiZdw/mo+7m/ZBF2aaXExpwDfvxIGH42qZs+UdvEGnlkJrBoAlN4U2GQyYNAywNnnznbSq0PlbY5ugNLCCPSyImNolAEQMIZIXVrl67maAtw3AijR/+/+xFumPCrUGQeyFOmM23PyANTVHKBUfAPIzwIuHwFEOdA0BHDyAlTVf1Y43TmGwzv0ySefYOzYsRg1ahQAYPny5di6dSu++eYbTJ8+3cq9IyKiyuTkl0hPdCkqNZg9gjBDXyTNeemsdICz0vTSbpCXM6ZEtIPhf5NfA0C3ALc775BPV2DCPiBpHXDxANCkNdDzZcA1ALCvwf2LN3PxA/zDgIsJ5m19Z/59xq+s2Bjc5A6AgyOg1BgDmVd7QGYHeHcCrhy1vI2A+/8XJu2Mg1Nupk8Htr4BnNr697IWDxoDb1WDYQBjqEz8DxD3tnEwDGAMyw9MAe6f8HfALM43BsiCHEChBtQexvkhS24Y75l0qNmIffobw+EdKCkpweHDhzFjxgxpmZ2dHcLDw5GQYOEXEUBxcTGKi/+e+0qvN7+ZmYiI6p7B0sjgm5RV83F/tXZvpdwecG8JPDzdePZOrjQuuxvOnsCQb4Bd7wPHvjdeBlY3MQbDNv2BSweA3xcbB8MEPgB0HwVo/QFxEsg8AYS+Ypy6Juw14P9eNl+/oxsQEGoMkgonoEnQ321FemD7NNNgCACpvwI/jDWOlnZqUnnfs08DO2eaLhMC+HUB0LwX0Kqv8RL4rx8Z74c0/O+xh14dgKcWA3s/NV6S7/Ua4N4KUFU+TydZxnsO70B2djbKy8vh7e1tstzb2xsZGZZHnc2bNw9arVZ6+fv710dXiYjoFm5qhcXBJoDxedR+t5nXsc7YyY1B626DYQWNHzDgI2DiYSDqIPDKb0Dn54Ck1UBMf+O8ileSgIQlwPI+QPYpoFl3oNOzQOZJ41lL745A/wXGYFnBpwsw7EdAyACvtsZ7JpXOf7fnZwEpP1nuU9o+46XmypQWAvuWVN6+91PjQJvDq4CDK/4OhoBxMM2Gkcbpf47/F/jyIeDkRtPL9VQtDIf1ZMaMGdDpdNLr4sWL1u4SEdE9yUejwrxnOlt8IszMAe3h2Uieq10tFYNaPNsC2qbAjUxgzwfmdaUFwJbXjf/drLvxkrG9I2CvBoL6GcPgyK3A2N3A058BdkrAXmkcUHProJkiveV5GysUVvHkl7JiIO9y5e156UBhbuUBUn/ZeK9kxaXnbZNrPvE3MRzeCQ8PD8jlcmRmmt6nkpmZCR8fyzcPK5VKaDQakxcREdU/OzsZ+gR5YHNUbzzWwQvN3BzRu1UTrB93PwYFN63W86gbrYsHKg9ul/4wnpWzVwIaX+O9hHs/AS4fAhycjINjHFTGS8oZfxoDpZOH+XpUGlhM3hUcqxiUonAGmj9QeXtAb2O/iqu4NSv3L8D5f1f2yoqBa+aPQaSq8Z7DO6BQKBASEoL4+HgMGjQIAGAwGBAfH4+JEydat3NERHRbaqU9OjdzxaeRwSgoLoejQg4XVcN7BrRVFVwDOjwFbJ9qPPvm0cYYCK+mAA9OMZ6NtMTJE2g7AEjZYt7mf7+xvTJyeyBkOPDHl+aPDpQrgF5RxnCo1FQeEF2bG8+QVhAGy3VUKZ45vEPR0dFYuXIlvv32WyQnJ2PChAnIz8+XRi8TEVHD56x0gJdGde8EQ/+elbf53Wd8LF4FUQ5sHA888hbw2DvGJ7U07wUMXWc8w5hXyVyKKi3wxEKgdYTp8ua9gWe/qnowCmAMd6NijY/6q+DdCRi1zdgHFx/g/lctf9bFF7CzN45gBoyjsD1aV709MsMzh3coMjISV69exaxZs5CRkYHg4GDExsaaDVIhIiJqMJy9gKHrgb9+B5J/Aq6nGpc7OAJPLTINbmp3ADLgv6ONo6l9OgO5acZBH8IA9Bxb+XY0fsAzK4D8bKAo13imz8mzevMc2skB3y7Aiz8ARdeNl8EdXU3POPYYYxzYcmSV8R5DwHgmM+IDYEv033WPvWecH5FqRCbEbcb0U53Q6/XQarXQ6XS8/5CIiOqWEMD1v4Aj3wKntxvP7gW/ZByRfDnxf/MqBpqOlDYYgOTNwIYR5uvrOxMIGWkMm9ZSMVF2QY5xXkN7FfDHSuDcLuPZxwcmG+drdHS1Xh+rIy8DyDlvnE/SrTng3RnQNAXsavfibk1yB8OhlTAcEhFRvck+C3wdbv685U7PAo/PrzzkFeqMU9zsmQdkJRsn5u7zunE6G23Tuu93TZUUGifBdlBZfgpMQ5ObBqx+Bsg+8/cypQYYvhnw7VqrAZHhsBFgOCQionpRfAPYFGWc88+SsbuBpvdVvY78HKBYZzw7p/Gt9S7ek4p0wA8vA2d2mrep3Y3zUt7uaTI1UJPcwQEpREREtqwot/JJqQHgxMbbr8PJHXBvwWBYm/KzgbNxltsKcoy3AVgJwyEREZHNq2LewVq+t42qqayo6snC87Prry+34DeCiIjIljm6AR0GV97ecVC9dYVuotKYTh10KytOwcNwSEREZMsUTsAjb1p+mkm34YA2oP77RICzL9D3TcttrfsZ53O0Es5zSEREZOvcWxoHnhz7r/H+Q5UrEDbROCK2OnMPUu2T2wOd/2F8hvXu941PdXFQG6cI6vVPq/5cOFrZSjhamYiI6p2hHCjSG58conS2dm8IMM4nmZcBlBUAcqVx0m4HZa1vpia5g2cOiYiI7hV2ckDtZu1e0M3s7ACtn7V7YYL3HBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGET0ghIiIiy0rygbwrQPIWQHcJCAoHfLsAmob1RA+qXQyHREREZK60ADizE/jvaEAYjMv+WAm4twSGbwJcA6zbP6ozvKxMRERE5vIygR/G/B0MK+ScB+LfM55VJJvEcEhERETm/toHGMott534PyA/u377Q/WG4ZCIiIjMFeZU3mYoM77IJjEcEhERkbnmfSpv82wHKF3qry9UrxgOiYiIyJy2GdCyr/lymQzovwBw9qr/PlG9YDgkIiIic86ewODlQN+ZgKObcVnTEGBULNAsxLp9ozrFqWyIiIjIMhcfoM9koNtLxlHLDmpA7W7tXlEdYzgkIiKiysnlnPT6HsPLykREREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiKJzYTDCxcuYMyYMWjRogUcHR3RqlUrzJ49GyUlJSZ1f/75Jx544AGoVCr4+/tjwYIFZuvasGED2rVrB5VKhc6dO2Pbtm0m7UIIzJo1C76+vnB0dER4eDjOnDlTp/tHREREVB9sJhympKTAYDBgxYoVOHHiBD799FMsX74cb775plSj1+vRr18/NG/eHIcPH8bChQsxZ84cfPnll1LNvn37MHToUIwZMwaJiYkYNGgQBg0ahOPHj0s1CxYswGeffYbly5fjwIEDcHJyQkREBIqKiup1n4mIiIhqm0wIIazdibqycOFCLFu2DOfPnwcALFu2DDNnzkRGRgYUCgUAYPr06di4cSNSUlIAAJGRkcjPz8eWLVuk9dx///0IDg7G8uXLIYSAn58fJk+ejDfeeAMAoNPp4O3tjVWrVuH555+vVt/0ej20Wi10Oh00Gk1t7jYRERGRiZrkDps5c2iJTqeDu7u79D4hIQEPPvigFAwBICIiAqdOncL169elmvDwcJP1REREICEhAQCQmpqKjIwMkxqtVovQ0FCpxpLi4mLo9XqTFxEREVFDY7Ph8OzZs1iyZAleeeUVaVlGRga8vb1N6ireZ2RkVFlzc/vNn7NUY8m8efOg1Wqll7+//x3uGREREVHdafDhcPr06ZDJZFW+Ki4JV7h8+TIef/xxDBkyBGPHjrVSz03NmDEDOp1Oel28eNHaXSIiIiIyY2/tDtzO5MmTMXLkyCprWrZsKf13eno6+vbti169epkMNAEAHx8fZGZmmiyreO/j41Nlzc3tFct8fX1NaoKDgyvto1KphFKprHI/iIiIiKytwYdDT09PeHp6Vqv28uXL6Nu3L0JCQhATEwM7O9MTo2FhYZg5cyZKS0vh4OAAAIiLi0Pbtm3h5uYm1cTHx2PSpEnS5+Li4hAWFgYAaNGiBXx8fBAfHy+FQb1ejwMHDmDChAl3ubdERERE1tXgLytX1+XLl/Hwww8jICAAH330Ea5evYqMjAyT+wBfeOEFKBQKjBkzBidOnMD69euxePFiREdHSzX/+te/EBsbi48//hgpKSmYM2cODh06hIkTJwIAZDIZJk2ahPfffx+bN2/GsWPHMHz4cPj5+WHQoEH1vdtEREREtarBnzmsrri4OJw9exZnz55Fs2bNTNoqZuvRarXYuXMnoqKiEBISAg8PD8yaNQvjxo2Tanv16oW1a9firbfewptvvonWrVtj48aN6NSpk1QzdepU5OfnY9y4ccjNzUWfPn0QGxsLlUpVPztLREREVEdsep7DhozzHBIREVF94TyHRERERHRHGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRxCbDYXFxMYKDgyGTyZCUlGTS9ueff+KBBx6ASqWCv78/FixYYPb5DRs2oF27dlCpVOjcuTO2bdtm0i6EwKxZs+Dr6wtHR0eEh4fjzJkzdblLRERERPXCJsPh1KlT4efnZ7Zcr9ejX79+aN68OQ4fPoyFCxdizpw5+PLLL6Waffv2YejQoRgzZgwSExMxaNAgDBo0CMePH5dqFixYgM8++wzLly/HgQMH4OTkhIiICBQVFdXL/hERERHVFZkQQli7E7Vp+/btiI6Oxg8//ICOHTsiMTERwcHBAIBly5Zh5syZyMjIgEKhAABMnz4dGzduREpKCgAgMjIS+fn52LJli7TO+++/H8HBwVi+fDmEEPDz88PkyZPxxhtvAAB0Oh28vb2xatUqPP/889Xqp16vh1arhU6ng0ajqcUjQERERGSqJrnDps4cZmZmYuzYsfjPf/4DtVpt1p6QkIAHH3xQCoYAEBERgVOnTuH69etSTXh4uMnnIiIikJCQAABITU1FRkaGSY1Wq0VoaKhUY0lxcTH0er3Ji4iIiKihsZlwKITAyJEjMX78eHTv3t1iTUZGBry9vU2WVbzPyMiosubm9ps/Z6nGknnz5kGr1Uovf3//GuwdERERUf1o8OFw+vTpkMlkVb5SUlKwZMkS5OXlYcaMGdbuskUzZsyATqeTXhcvXrR2l4iIiIjM2Fu7A7czefJkjBw5ssqali1bYteuXUhISIBSqTRp6969O1588UV8++238PHxQWZmpkl7xXsfHx/pT0s1N7dXLPP19TWpqbi30RKlUmnWNyIiIqKGpsGHQ09PT3h6et627rPPPsP7778vvU9PT0dERATWr1+P0NBQAEBYWBhmzpyJ0tJSODg4AADi4uLQtm1buLm5STXx8fGYNGmStK64uDiEhYUBAFq0aAEfHx/Ex8dLYVCv1+PAgQOYMGFCbewyERERkdU0+HBYXQEBASbvnZ2dAQCtWrVCs2bNAAAvvPAC3nnnHYwZMwbTpk3D8ePHsXjxYnz66afS5/71r3/hoYcewscff4wBAwbgu+++w6FDh6TpbmQyGSZNmoT3338frVu3RosWLfD222/Dz88PgwYNqp+dJSIiIqojNhMOq0Or1WLnzp2IiopCSEgIPDw8MGvWLIwbN06q6dWrF9auXYu33noLb775Jlq3bo2NGzeiU6dOUs3UqVORn5+PcePGITc3F3369EFsbCxUKpU1douIiIio1tjcPIeNBec5JCIiovpyz85zSERERER3h+GQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkNQ6HV65cwerVq7Ft2zaUlJSYtOXn5+Pdd9+ttc4RERERUf2SCSFEdYv/+OMP9OvXDwaDAaWlpWjatCk2btyIjh07AgAyMzPh5+eH8vLyOuuwrdDr9dBqtdDpdNBoNNbuDhEREdmwmuSOGp05fPPNNzF48GBcv34dmZmZeOyxx/DQQw8hMTHxrjpMRERERA2DfU2KDx8+jKVLl8LOzg4uLi744osvEBAQgEcffRQ7duxAQEBAXfWTiIiIiOpBjcIhABQVFZm8nz59Ouzt7dGvXz988803tdYxIiIiIqp/NQqHnTp1wr59+9ClSxeT5W+88QYMBgOGDh1aq50jIiIiovpVo3sOhw8fjr1791psmzp1Kt555x1eWiYiIiJqxGo0WplqD0crExERUX2ps9HKRUVF2Lx5M/Ly8ixudPPmzSguLq5Zb4mIiIiowahROFyxYgUWL14MFxcXszaNRoPPPvsMK1eurLXOEREREVH9qlE4XLNmDSZNmlRp+6RJk/Dvf//7bvtERERERFZSo3B45swZdO3atdL2Ll264MyZM3fdKSIiIiKyjhqFw7KyMly9erXS9qtXr6KsrOyuO0VERERE1lGjcNixY0f8/PPPlbbv3LlTes4yERERETU+NQqHo0ePxnvvvYctW7aYtf3000+YO3cuRo8eXWudIyIiIqL6VaMnpIwbNw6//vornn76abRr1w5t27YFAKSkpOD06dN47rnnMG7cuDrpKBERERHVvRqdOQSA1atXY/369WjTpg1Onz6NU6dOoW3btli3bh3WrVtXF30kIiIionpSozOH5eXl+Oijj7B582aUlJTgySefxJw5c+Do6FhX/SMiIiKielSjM4cffPAB3nzzTTg7O6Np06b47LPPEBUVVVd9IyIiIqJ6VqNw+O9//xtffPEFduzYgY0bN+Knn37CmjVrYDAY6qp/RERERFSPahQO09LS8MQTT0jvw8PDIZPJkJ6eXusdIyIiIqL6V+NJsFUqlckyBwcHlJaW1mqniIiIiMg6ajQgRQiBkSNHQqlUSsuKioowfvx4ODk5Scv+7//+r/Z6SERERET1pkbhcMSIEWbLXnrppVrrDBERERFZV43CYUxMTF31g4iIiIgagBpPgk1EREREtovhkIiIiIgkDIdEREREJLG5cLh161aEhobC0dERbm5uGDRokEl7WloaBgwYALVaDS8vL0yZMgVlZWUmNXv27MF9990HpVKJoKAgrFq1ymw7S5cuRWBgIFQqFUJDQ3Hw4ME63CsiIiKi+mFT4fCHH37AsGHDMGrUKBw9ehS///47XnjhBam9vLwcAwYMQElJCfbt24dvv/0Wq1atwqxZs6Sa1NRUDBgwAH379kVSUhImTZqEl19+GTt27JBq1q9fj+joaMyePRtHjhxB165dERERgaysrHrdXyIiIqLaJhNCCGt3ojaUlZUhMDAQ77zzDsaMGWOxZvv27XjyySeRnp4Ob29vAMDy5csxbdo0XL16FQqFAtOmTcPWrVtx/Phx6XPPP/88cnNzERsbCwAIDQ1Fjx498PnnnwMADAYD/P398dprr2H69OnV6q9er4dWq4VOp4NGo7mbXSciIiKqUk1yh82cOTxy5AguX74MOzs7dOvWDb6+vujfv79JyEtISEDnzp2lYAgAERER0Ov1OHHihFQTHh5usu6IiAgkJCQAAEpKSnD48GGTGjs7O4SHh0s1lhQXF0Ov15u8iIiIiBoamwmH58+fBwDMmTMHb731FrZs2QI3Nzc8/PDDyMnJAQBkZGSYBEMA0vuMjIwqa/R6PQoLC5GdnY3y8nKLNRXrsGTevHnQarXSy9/f/+52mIiIiKgONPhwOH36dMhksipfKSkpMBgMAICZM2fi2WefRUhICGJiYiCTybBhwwYr7wUwY8YM6HQ66XXx4kVrd4mIiIjITI2ekGINkydPxsiRI6usadmyJa5cuQIA6NChg7RcqVSiZcuWSEtLAwD4+PiYjSrOzMyU2ir+rFh2c41Go4GjoyPkcjnkcrnFmop1WKJUKk2eSU1ERETUEDX4cOjp6QlPT8/b1oWEhECpVOLUqVPo06cPAKC0tBQXLlxA8+bNAQBhYWGYO3cusrKy4OXlBQCIi4uDRqORQmVYWBi2bdtmsu64uDiEhYUBABQKBUJCQhAfHy9Nk2MwGBAfH4+JEyfWyj4TERERWUuDv6xcXRqNBuPHj8fs2bOxc+dOnDp1ChMmTAAADBkyBADQr18/dOjQAcOGDcPRo0exY8cOvPXWW4iKipLO6o0fPx7nz5/H1KlTkZKSgi+++ALff/89Xn/9dWlb0dHRWLlyJb799lskJydjwoQJyM/Px6hRo+p/x4mIiIhqUYM/c1gTCxcuhL29PYYNG4bCwkKEhoZi165dcHNzAwDI5XJs2bIFEyZMQFhYGJycnDBixAi8++670jpatGiBrVu34vXXX8fixYvRrFkzfPXVV4iIiJBqIiMjcfXqVcyaNQsZGRkIDg5GbGys2SAVIiIiosbGZuY5bGw4zyERERHVl3tynkMiIiIiunsMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEhiU+Hw9OnTGDhwIDw8PKDRaNCnTx/s3r3bpCYtLQ0DBgyAWq2Gl5cXpkyZgrKyMpOaPXv24L777oNSqURQUBBWrVpltq2lS5ciMDAQKpUKoaGhOHjwYF3uGhEREVG9sLd2B2rTk08+idatW2PXrl1wdHTEokWL8OSTT+LcuXPw8fFBeXk5BgwYAB8fH+zbtw9XrlzB8OHD4eDggA8++AAAkJqaigEDBmD8+PFYs2YN4uPj8fLLL8PX1xcREREAgPXr1yM6OhrLly9HaGgoFi1ahIiICJw6dQpeXl61uk/l5eUoLS2t1XU2FA4ODpDL5dbuBhEREd1EJoQQ1u5EbcjOzoanpyd+/fVXPPDAAwCAvLw8aDQaxMXFITw8HNu3b8eTTz6J9PR0eHt7AwCWL1+OadOm4erVq1AoFJg2bRq2bt2K48ePS+t+/vnnkZubi9jYWABAaGgoevTogc8//xwAYDAY4O/vj9deew3Tp0+vVn/1ej20Wi10Oh00Go1ZuxACGRkZyM3NvZvD0uC5urrCx8cHMpnM2l0hIiKyWbfLHTezmTOHTZo0Qdu2bfHvf/9buiS8YsUKeHl5ISQkBACQkJCAzp07S8EQACIiIjBhwgScOHEC3bp1Q0JCAsLDw03WHRERgUmTJgEASkpKcPjwYcyYMUNqt7OzQ3h4OBISEirtX3FxMYqLi6X3er2+yv2pCIZeXl5Qq9U2F56EECgoKEBWVhYAwNfX18o9IiIiIsCGwqFMJsPPP/+MQYMGwcXFBXZ2dvDy8kJsbCzc3NwAGAPXzcEQgPQ+IyOjyhq9Xo/CwkJcv34d5eXlFmtSUlIq7d+8efPwzjvvVGtfysvLpWDYpEmTan2mMXJ0dAQAZGVlwcvLi5eYiYiIGoAGPyBl+vTpkMlkVb5SUlIghEBUVBS8vLzw22+/4eDBgxg0aBCeeuopXLlyxdq7gRkzZkCn00mvixcvVlpbcY+hWq2ur+5ZTcU+2up9lURERI1Ngz9zOHnyZIwcObLKmpYtW2LXrl3YsmULrl+/Ll1L/+KLLxAXF4dvv/0W06dPh4+Pj9mo4szMTACAj4+P9GfFsptrNBoNHB0dIZfLIZfLLdZUrMMSpVIJpVJZrX2uYGuXki25F/aRiIioMWnw4dDT0xOenp63rSsoKABgvP/vZnZ2djAYDACAsLAwzJ07V7qMCQBxcXHQaDTo0KGDVLNt2zaTdcTFxSEsLAwAoFAoEBISgvj4eAwaNAiAcUBKfHw8Jk6ceOc7SkRERNQANPjLytUVFhYGNzc3jBgxAkePHsXp06cxZcoUaWoaAOjXrx86dOiAYcOG4ejRo9ixYwfeeustREVFSWf1xo8fj/Pnz2Pq1KlISUnBF198ge+//x6vv/66tK3o6GisXLkS3377LZKTkzFhwgTk5+dj1KhRVtn3ChcuXIBMJkNSUpJV+0FERESNl82EQw8PD8TGxuLGjRt45JFH0L17d+zduxebNm1C165dAQByuRxbtmyBXC5HWFgYXnrpJQwfPhzvvvuutJ4WLVpg69atiIuLQ9euXfHxxx/jq6++kuY4BIDIyEh89NFHmDVrFoKDg5GUlITY2FizQSqN3apVq+Dq6mrtbhAREVE9avCXlWuie/fu2LFjR5U1zZs3N7tsfKuHH34YiYmJVdZMnDiRl5Grqby8HDKZzOySPxERETU8/Ne6ETIYDFiwYAGCgoKgVCoREBCAuXPnmtVZOvO3ceNGk0EgR48eRd++feHi4gKNRoOQkBAcOnQIe/bswahRo6DT6aRR4XPmzAFgnLPxjTfeQNOmTeHk5ITQ0FDs2bPHbLubN29Ghw4doFQqkZaWVheHgoiIiGqZTZ05vFfMmDEDK1euxKeffoo+ffrgypUrVc6xWJUXX3wR3bp1w7JlyyCXy5GUlAQHBwf06tULixYtwqxZs3Dq1CkAgLOzMwDjWdOTJ0/iu+++g5+fH3788Uc8/vjjOHbsGFq3bg3AOEDoww8/xFdffYUmTZrU+mMFiYiIqG4wHDYyeXl5WLx4MT7//HOMGDECANCqVSv06dMHFy5cqPH60tLSMGXKFLRr1w4ApHAHAFqtFjKZzGSKnrS0NMTExCAtLQ1+fn4AgDfeeAOxsbGIiYmRnlFdWlqKL774Qrrfk4iIiBoHhsNGJjk5GcXFxXj00UdrZX3R0dF4+eWX8Z///Afh4eEYMmQIWrVqVWn9sWPHUF5ejjZt2pgsLy4uNnmai0KhQJcuXWqlj0RERFR/GA4bmYpHzlWHnZ0dhBAmy259EsmcOXPwwgsvYOvWrdi+fTtmz56N7777DoMHD7a4zhs3bkAul+Pw4cNmj7uruOxc0U9OcE1ERNT4cEBKI9O6dWs4OjoiPj7+trWenp7Iy8tDfn6+tMzSHIht2rTB66+/jp07d+KZZ55BTEwMAOPZv/LycpPabt26oby8HFlZWQgKCjJ5VfWEGCIiImocGA4bGZVKhWnTpmHq1Kn497//jXPnzmH//v34+uuvzWpDQ0OhVqvx5ptv4ty5c1i7di1WrVoltRcWFmLixInYs2cP/vrrL/z+++/4448/0L59ewBAYGAgbty4gfj4eGRnZ6OgoABt2rTBiy++iOHDh+P//u//kJqaioMHD2LevHnYunVrfR0GIiIiqiMMh43Q22+/jcmTJ2PWrFlo3749IiMjkZWVZVbn7u6O1atXY9u2bejcuTPWrVsnTUcDGCcFv3btGoYPH442bdrgueeeQ//+/fHOO+8AAHr16oXx48cjMjISnp6eWLBgAQAgJiYGw4cPx+TJk9G2bVsMGjQIf/zxBwICAupl/4nINhhKS1Fy+TKKz51DyeXLMJSUWLtLRARAJm69KY3qhV6vh1arhU6ng0ajMWkrKipCamoqWrRoAZVKZaUe1o97aV+J6G9l2dm4vn49cr6JgSE/HzJHR7gNHQr3USPh4Olp7e4R2ZyqcseteOaQiIjqlaGwENlffYXsJZ/D8L97okVhIXK++QZZH32M8rw8K/eQ6N7GcEhERPWqLDsb19estdim37wZZdeu1XOPiOhmDIdERFSvynU64JZptSRCoJzhkMiqGA6JiKhe2d1mvlY7J6d66gkRWcJwSERE9Uru7g5V584W2xSBgZDf9LQlIqp/DIdERFSv7N3c0PTjj+Dg72+63MsLzb5YytHKRFbGx+cREVG9UwQEoPma1Si9eBHF51OhCAiAonkAHPikJSKrYzgkIiKrcPDygoOXF9QhIdbuChHdhJeViYiIiEjCcEhEREREEoZDqjXz5s1Djx494OLiAi8vLwwaNAinTp2ydreIiIioBhgObZiuoATnsm4gMe06zl29AV1B3T7U/pdffkFUVBT279+PuLg4lJaWol+/fsj/3+OxiIiIqOHjgBQblZ5biGk//InfzmRLyx5s7YH5z3aBn2vVE9DeqdjYWJP3q1atgpeXFw4fPowHH3ywTrZJREREtYtnDm2QrqDELBgCwK9nsjH9hz/r/Ayi1A+dDgDg7u5eL9sjIiKiu8dwaIOyb5SYBcMKv57JRvaNug+HBoMBkyZNQu/evdGpU6c63x4RERHVDl5WtkH6okoeaP8/ebdprw1RUVE4fvw49u7dW+fbIiIiotrDcGiDNCqHKttdbtN+tyZOnIgtW7bg119/RbNmzep0W0RERFS7eFnZBnk4K/Bgaw+LbQ+29oCHs6JOtiuEwMSJE/Hjjz9i165daNGiRZ1sh4iIiOoOw6EN0qoVmP9sF7OA+GBrD3z4bBdo1XUTDqOiorB69WqsXbsWLi4uyMjIQEZGBgoLC+tke0RERFT7eFnZRvm5OmLJ0G7IvlGCvKJSuKgc4OGsqLNgCADLli0DADz88MMmy2NiYjBy5Mg62y4RERHVHoZDG6ZV120YvJUQot62RURERHWDl5WJiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQ1tWeB3IPg1cOgRknzG+r2O//vornnrqKfj5+UEmk2Hjxo11vk0iIiKqPXy2sq3SXQY2TQTO7/p7WatHgaeXANqmdbbZ/Px8dO3aFaNHj8YzzzxTZ9shIiKiusFwaIsKr5sHQwA4Fw9sfg34x9eAo1udbLp///7o379/naybiIiI6h4vK9ui/KvmwbDCuXhjOxEREZEFDIe2qEh/d+1ERER0z2I4tEUqzd21ExER0T2r0YTDuXPnolevXlCr1XB1dbVYk5aWhgEDBkCtVsPLywtTpkxBWVmZSc2ePXtw3333QalUIigoCKtWrTJbz9KlSxEYGAiVSoXQ0FAcPHjQpL2oqAhRUVFo0qQJnJ2d8eyzzyIzM7O2dvXuOXkaB59Y0upRYzsRERGRBY0mHJaUlGDIkCGYMGGCxfby8nIMGDAAJSUl2LdvH7799lusWrUKs2bNkmpSU1MxYMAA9O3bF0lJSZg0aRJefvll7NixQ6pZv349oqOjMXv2bBw5cgRdu3ZFREQEsrKypJrXX38dP/30EzZs2IBffvkF6enpDWtkrqObcVTyrQGxYrRyHQ1GISIiIhsgGpmYmBih1WrNlm/btk3Y2dmJjIwMadmyZcuERqMRxcXFQgghpk6dKjp27GjyucjISBERESG979mzp4iKipLel5eXCz8/PzFv3jwhhBC5ubnCwcFBbNiwQapJTk4WAERCQkK190On0wkAQqfTmbUVFhaKkydPisLCwmqvz6KCHCGunhLi4h/GPwty7m591ZCXlycSExNFYmKiACA++eQTkZiYKP766y+L9bW2r0RERFSpqnLHrRrNmcPbSUhIQOfOneHt7S0ti4iIgF6vx4kTJ6Sa8PBwk89FREQgISEBgPHs5OHDh01q7OzsEB4eLtUcPnwYpaWlJjXt2rVDQECAVGNJcXEx9Hq9yavOOboBHm2AZt2Nf9bDGcNDhw6hW7du6NatGwAgOjoa3bp1MzmDS0RERA2XzcxzmJGRYRIMAUjvMzIyqqzR6/UoLCzE9evXUV5ebrEmJSVFWodCoTC779Hb21vajiXz5s3DO++8c0f71pg8/PDDEEJYuxtERER0h6x65nD69OmQyWRVvipCWWM3Y8YM6HQ66XXx4kVrd4mIiIjIjFXPHE6ePBkjR46ssqZly5bVWpePj4/ZqOKKEcQ+Pj7Sn7eOKs7MzIRGo4GjoyPkcjnkcrnFmpvXUVJSgtzcXJOzhzfXWKJUKqFUKqu1L0RERETWYtUzh56enmjXrl2VL4VCUa11hYWF4dixYyajiuPi4qDRaNChQwepJj4+3uRzcXFxCAsLAwAoFAqEhISY1BgMBsTHx0s1ISEhcHBwMKk5deoU0tLSpBoiIiKixqrR3HOYlpaGnJwcpKWloby8HElJSQCAoKAgODs7o1+/fujQoQOGDRuGBQsWICMjA2+99RaioqKkM3bjx4/H559/jqlTp2L06NHYtWsXvv/+e2zdulXaTnR0NEaMGIHu3bujZ8+eWLRoEfLz8zFq1CgAgFarxZgxYxAdHQ13d3doNBq89tprCAsLw/3331/vx4WIiIioVtX94OnaMWLECAHA7LV7926p5sKFC6J///7C0dFReHh4iMmTJ4vS0lKT9ezevVsEBwcLhUIhWrZsKWJiYsy2tWTJEhEQECAUCoXo2bOn2L9/v0l7YWGhePXVV4Wbm5tQq9Vi8ODB4sqVKzXan3qZyqYRuJf2lYiIyFpqMpWNTAgOLbUGvV4PrVYLnU4Hjcb0cXZFRUVITU1FixYtoFKprNTD+nEv7SsREZG1VJU7bmUz8xwSERER0d1jOCQiIiIiCcMhEREREUkYDomIiIhIwnBow3TFOqTqUvHn1T+RqkuFrlhXL9tdunQpAgMDoVKpEBoaajY5ORERETVcjWaeQ6qZjPwMzN43G/vS90nLevv1xpxec+DjVPmTXO7W+vXrER0djeXLlyM0NBSLFi1CREQETp06BS8vrzrbLhEREdUOnjm0QbpinVkwBIDf03/HnH1z6vQM4ieffIKxY8di1KhR6NChA5YvXw61Wo1vvvmmzrZJREREtYfh0AblFOWYBcMKv6f/jpyinDrZbklJCQ4fPozw8HBpmZ2dHcLDw5GQkFAn2yQiIqLaxXBog/JK8u6q/U5lZ2ejvLwc3t7eJsu9vb2RkZFRJ9skIiKi2sVwaINcFC531U5ERET3LoZDG+Suckdvv94W23r79Ya7yr1Otuvh4QG5XI7MzEyT5ZmZmfDxqbtBMERERFR7GA5tkFapxZxec8wCYsVoZa1SWyfbVSgUCAkJQXx8vLTMYDAgPj4eYWFhdbJNIiIiql2cysZG+Tj54MMHP0ROUQ7ySvLgonCBu8q9zoJhhejoaIwYMQLdu3dHz549sWjRIuTn52PUqFF1ul0iIiKqHQyHNkyr1NZ5GLxVZGQkrl69ilmzZiEjIwPBwcGIjY01G6RCREREDRPDIdW6iRMnYuLEidbuBhEREd0B3nNIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHNqwMp0OxefPo/DoURSfT0WZTlen21u2bBm6dOkCjUYDjUaDsLAwbN++vU63SURERLWLz1a2UaVXMpD+1lso+P13aZm6Tx/4vfceHHx96mSbzZo1w/z589G6dWsIIfDtt99i4MCBSExMRMeOHetkm0RERFS7eObQBpXpdGbBEAAK9u5F+ttv19kZxKeeegpPPPEEWrdujTZt2mDu3LlwdnbG/v3762R7REREVPt45tAGlV+7ZhYMKxTs3Yvya9dgr9XWbR/Ky7Fhwwbk5+cjLCysTrdFRES1ozQrC8VnzyJ/717Ye3rCuW9f2Ht7Q+7oaO2uUT1iOLRBhry827TfqLNtHzt2DGFhYSgqKoKzszN+/PFHdOjQoc62R0REtaP0SgYuvvIKik+flpZlLVgIvwUL4Bz+KAPiPYSXlW2QnYvLbdqd62zbbdu2RVJSEg4cOIAJEyZgxIgROHnyZJ1tj4iI7p6huBjZX64wCYYAACGQPm0ayrKyrNMxsgqGQxskb9IE6j59LLap+/SBvEmTOtu2QqFAUFAQQkJCMG/ePHTt2hWLFy+us+0REdHdK792Dbr/+9Fyo8GA/L2Wb1Ui28RwaIPstVr4vfeeWUBU9+kDv/ffq/P7DW9mMBhQXFxcb9sjIqKaEwYDRBV/V5fnXKvH3pC18Z5DG+Xg64OmH3+E8mvXYMi7ATsXZ8ibNKnTYDhjxgz0798fAQEByMvLw9q1a7Fnzx7s2LGjzrZJRER3z06thrJ9exQnJ1tsd+rVq557RNbEcGjD7LXaej1LmJWVheHDh+PKlSvQarXo0qULduzYgccee6ze+kBERDVn7+4O75lvIm3YcEAIkzZVly5wCAiwUs/IGhgOqdZ8/fXX1u4CERHdIVXHjmi+ZjUyP5iHouPHYeekhuvzQ+E+fBgcPD2t3T2qRwyHREREBLmjI9T33Qf/lV/CUFAAmVwOeZMmsHNwsHbXqJ4xHBIREZHE3s0NcHOzdjfIijhamYiIiIgkDIdEREREJGE4bMDELSPGbNG9sI9ERESNCcNhA+Twv5t/CwoKrNyTulexjw684ZmIiKhB4ICUBkgul8PV1RVZ/3uWpVqthkwms3KvapcQAgUFBcjKyoKrqyvkcrm1u0RERERgOGywfHx8AEAKiLbK1dVV2lciIiKyPobDBkomk8HX1xdeXl4oLS21dnfqhIODA88YEhERNTAMhw2cXC5ngCIiIqJ602gGpMydOxe9evWCWq2Gq6urWfvRo0cxdOhQ+Pv7w9HREe3bt8fixYvN6vbs2YP77rsPSqUSQUFBWLVqlVnN0qVLERgYCJVKhdDQUBw8eNCkvaioCFFRUWjSpAmcnZ3x7LPPIjMzs7Z2lYiIiMhqGk04LCkpwZAhQzBhwgSL7YcPH4aXlxdWr16NEydOYObMmZgxYwY+//xzqSY1NRUDBgxA3759kZSUhEmTJuHll1/Gjh07pJr169cjOjoas2fPxpEjR9C1a1dERESY3Pv3+uuv46effsKGDRvwyy+/ID09Hc8880zd7TwRERFRPZGJRjbR3KpVqzBp0iTk5ubetjYqKgrJycnYtWsXAGDatGnYunUrjh8/LtU8//zzyM3NRWxsLAAgNDQUPXr0kEKlwWCAv78/XnvtNUyfPh06nQ6enp5Yu3Yt/vGPfwAAUlJS0L59eyQkJOD++++32Jfi4mIUFxdL7/V6Pfz9/aHT6aDRaO7oWBARERFVh16vh1arrVbusOl7DnU6Hdzd3aX3CQkJCA8PN6mJiIjApEmTABjPTh4+fBgzZsyQ2u3s7BAeHo6EhAQAxjOUpaWlJutp164dAgICqgyH8+bNwzvvvGO2XK/X3/H+EREREVVHRd6ozjlBmw2H+/btw/r167F161ZpWUZGBry9vU3qvL29odfrUVhYiOvXr6O8vNxiTUpKirQOhUJhdt+jt7c3MjIyKu3PjBkzEB0dLb2/fPkyOnToAH9//zvdRSIiIqIaycvLg1arrbLGquFw+vTp+PDDD6usSU5ORrt27Wq03uPHj2PgwIGYPXs2+vXrdzddrDVKpRJKpVJ67+zsjIsXL8LFxQUymUy6zHzx4kVeZr4Jj4s5HhPLeFws43GxjMfFHI+JZbZyXIQQyMvLg5+f321rrRoOJ0+ejJEjR1ZZ07Jlyxqt8+TJk3j00Ucxbtw4vPXWWyZtPj4+ZqOKMzMzodFo4OjoKE0bY6mmYqJmHx8flJSUIDc31+Ts4c011WFnZ4dmzZqZLddoNI36y1dXeFzM8ZhYxuNiGY+LZTwu5nhMLLOF43K7M4YVrBoOPT094enpWWvrO3HiBB555BGMGDECc+fONWsPCwvDtm3bTJbFxcUhLCwMAKBQKBASEoL4+HgMGjQIgHFASnx8PCZOnAgACAkJgYODA+Lj4/Hss88CAE6dOoW0tDRpPURERESNVaO55zAtLQ05OTlIS0tDeXk5kpKSAABBQUFwdnbG8ePH8cgjjyAiIgLR0dHS/X9yuVwKoOPHj8fnn3+OqVOnYvTo0di1axe+//57k/sSo6OjMWLECHTv3h09e/bEokWLkJ+fj1GjRgEwpu4xY8YgOjoa7u7u0Gg0eO211xAWFlbpYBQiIiKiRkM0EiNGjBAAzF67d+8WQggxe/Zsi+3Nmzc3Wc/u3btFcHCwUCgUomXLliImJsZsW0uWLBEBAQFCoVCInj17iv3795u0FxYWildffVW4ubkJtVotBg8eLK5cuXJX+1dUVCRmz54tioqK7mo9tobHxRyPiWU8LpbxuFjG42KOx8Sye/G4NLp5DomIiIio7jSaJ6QQERERUd1jOCQiIiIiCcMhEREREUkYDomIiIhIwnBYy55++mkEBARApVLB19cXw4YNQ3p6uknNn3/+iQceeAAqlQr+/v5YsGCB2Xo2bNiAdu3aQaVSoXPnzmbzMwohMGvWLPj6+sLR0RHh4eE4c+aMSU1OTg5efPFFaDQauLq6YsyYMbhx40bt73QVLly4gDFjxqBFixZwdHREq1atMHv2bJSUlJjUyGQys9f+/ftN1mUrxwSo3nEB7q3vSoW5c+eiV69eUKvVZo+prGDp+/Ldd9+Z1OzZswf33XcflEolgoKCsGrVKrP1LF26FIGBgVCpVAgNDcXBgwdN2ouKihAVFYUmTZrA2dkZzz77rNkk+fWlOsclLS0NAwYMgFqthpeXF6ZMmYKysjKTGls7LrcKDAw0+27Mnz/fpKa+fq8am9v93BuzOXPmmH0vbn76WnW+07X1+9UoWHOotC365JNPREJCgrhw4YL4/fffRVhYmAgLC5PadTqd8Pb2Fi+++KI4fvy4WLdunXB0dBQrVqyQan7//Xchl8vFggULxMmTJ8Vbb70lHBwcxLFjx6Sa+fPnC61WKzZu3CiOHj0qnn76adGiRQtRWFgo1Tz++OOia9euYv/+/eK3334TQUFBYujQofVzIP5n+/btYuTIkWLHjh3i3LlzYtOmTcLLy0tMnjxZqklNTRUAxM8//yyuXLkivUpKSqQaWzomQlTvuNxr35UKs2bNEp988omIjo4WWq3WYg0AERMTY/J9uXl/zp8/L9RqtYiOjhYnT54US5YsEXK5XMTGxko13333nVAoFOKbb74RJ06cEGPHjhWurq4iMzNTqhk/frzw9/cX8fHx4tChQ+L+++8XvXr1qrN9r8rtjktZWZno1KmTCA8PF4mJiWLbtm3Cw8NDzJgxQ6qxxeNyq+bNm4t3333X5Ltx48YNqb0+f68ak+r83Buz2bNni44dO5p8L65evSq13+47XVu/X40Fw2Ed27Rpk5DJZFLQ+eKLL4Sbm5soLi6WaqZNmybatm0rvX/uuefEgAEDTNYTGhoqXnnlFSGEEAaDQfj4+IiFCxdK7bm5uUKpVIp169YJIYQ4efKkACD++OMPqWb79u1CJpOJy5cv1/6O1sCCBQtEixYtpPcV4TAxMbHSz9j6MRHC/Ljc69+VmJiYKsPhjz/+WOlnp06dKjp27GiyLDIyUkREREjve/bsKaKioqT35eXlws/PT8ybN08IYTxODg4OYsOGDVJNcnKyACASEhLuYI9qR2XHZdu2bcLOzk5kZGRIy5YtWyY0Go30HbLl41KhefPm4tNPP620vb5+rxqb2/3cG7vZs2eLrl27Wmyrzne6tn6/GgteVq5DOTk5WLNmDXr16gUHBwcAQEJCAh588EEoFAqpLiIiAqdOncL169elmvDwcJN1RUREICEhAQCQmpqKjIwMkxqtVovQ0FCpJiEhAa6urujevbtUEx4eDjs7Oxw4cKBudriadDod3N3dzZY//fTT8PLyQp8+fbB582aTNls/JoD5ceF3pWpRUVHw8PBAz5498c0330DcNGXr7Y5LSUkJDh8+bFJjZ2eH8PBwqebw4cMoLS01qWnXrh0CAgKkmoYkISEBnTt3hre3t7QsIiICer0eJ06ckGruheMyf/58NGnSBN26dcPChQtNLv3V1+9VY1Kdn7stOHPmDPz8/NCyZUu8+OKLSEtLA1C973Rt/H41JgyHdWDatGlwcnJCkyZNkJaWhk2bNkltGRkZJl8uANL7ikf+VVZzc/vNn6usxsvLy6Td3t4e7u7uUo01nD17FkuWLMErr7wiLXN2dsbHH3+MDRs2YOvWrejTpw8GDRpkEhBt+ZgAlo/Lvf5dqcq7776L77//HnFxcXj22Wfx6quvYsmSJVJ7ZcdFr9ejsLAQ2dnZKC8vv+1xUSgUZvf33VzTkNzN98WWjss///lPfPfdd9i9ezdeeeUVfPDBB5g6darUXl+/V41JdX7ujV1oaChWrVqF2NhYLFu2DKmpqXjggQeQl5dXre90bfx+NSYMh9Uwffp0izfA3/xKSUmR6qdMmYLExETs3LkTcrkcw4cPNzmrYQtqekwA4PLly3j88ccxZMgQjB07Vlru4eGB6OhohIaGokePHpg/fz5eeuklLFy4sL53667V5nGxJXdyXKry9ttvo3fv3ujWrRumTZuGqVOn3jPfl3tRTY5TdHQ0Hn74YXTp0gXjx4/Hxx9/jCVLlqC4uNjKe0HW1L9/fwwZMgRdunRBREQEtm3bhtzcXHz//ffW7lqDZG/tDjQGkydPxsiRI6usadmypfTfHh4e8PDwQJs2bdC+fXv4+/tj//79CAsLg4+Pj9kIqIr3Pj4+0p+Wam5ur1jm6+trUhMcHCzVZGVlmayjrKwMOTk50ufvRk2PSXp6Ovr27YtevXrhyy+/vO36Q0NDERcXJ71vDMcEqN3jYivfFaDmx6WmQkND8d5776G4uBhKpbLS46LRaODo6Ai5XA65XH7bY1dSUoLc3FyTMwo319yt2jwuPj4+ZqNLq/t9aWjH5VZ3c5xCQ0NRVlaGCxcuoG3btvX2e9WYeHh43PbnbmtcXV3Rpk0bnD17Fo899thtv9O18fvVqFj7pkdb99dffwkAYvfu3UKIv2+Gvnkk7owZM8xuhn7yySdN1hMWFmZ2M/RHH30ktet0OouDDA4dOiTV7NixwyqDDC5duiRat24tnn/+eVFWVlatz7z88suiW7du0ntbOyZC3P643IvflZtVNSDlVu+//75wc3OT3k+dOlV06tTJpGbo0KFmAy8mTpwovS8vLxdNmzY1G3jx3//+V6pJSUmx+sCL2w1IuXl06YoVK4RGoxFFRUVCCNs+LpVZvXq1sLOzEzk5OUKI+vu9amxu93O3NXl5ecLNzU0sXry4Wt/p2vr9aiwYDmvR/v37xZIlS0RiYqK4cOGCiI+PF7169RKtWrWSvjy5ubnC29tbDBs2TBw/flx89913Qq1Wm02jYG9vLz766CORnJwsZs+ebXEaBVdXV7Fp0ybx559/ioEDB1qcnqRbt27iwIEDYu/evaJ169b1Pj3JpUuXRFBQkHj00UfFpUuXTKYRqLBq1Sqxdu1akZycLJKTk8XcuXOFnZ2d+Oabb6QaWzomQlTvuNxr35UKf/31l0hMTBTvvPOOcHZ2FomJiSIxMVHk5eUJIYTYvHmzWLlypTh27Jg4c+aM+OKLL4RarRazZs2S1lExpcSUKVNEcnKyWLp0qcUpW5RKpVi1apU4efKkGDdunHB1dTUZjTh+/HgREBAgdu3aJQ4dOmQ2NVV9ut1xqZhqo1+/fiIpKUnExsYKT09Pi1Nt2NJxudm+ffvEp59+KpKSksS5c+fE6tWrhaenpxg+fLhUU5+/V41JdX7ujdnkyZPFnj17RGpqqvj9999FeHi48PDwEFlZWUKI23+na+v3q7FgOKxFf/75p+jbt69wd3cXSqVSBAYGivHjx4tLly6Z1B09elT06dNHKJVK0bRpUzF//nyzdX3//feiTZs2QqFQiI4dO4qtW7eatBsMBvH2228Lb29voVQqxaOPPipOnTplUnPt2jUxdOhQ4ezsLDQajRg1apT0D0l9iYmJEQAsviqsWrVKtG/fXqjVaqHRaETPnj1NphSoYCvHRIjqHRch7q3vSoURI0ZYPC4VZ9+3b98ugoODhbOzs3BychJdu3YVy5cvF+Xl5Sbr2b17twgODhYKhUK0bNlSxMTEmG1ryZIlIiAgQCgUCtGzZ0+xf/9+k/bCwkLx6quvCjc3N6FWq8XgwYNNAnx9ut1xEUKICxcuiP79+wtHR0fh4eEhJk+eLEpLS03WY2vH5WaHDx8WoaGhQqvVCpVKJdq3by8++OAD6X/OK9TX71Vjc7ufe2MWGRkpfH19hUKhEE2bNhWRkZHi7NmzUnt1vtO19fvVGMiEsLGREkRERER0xzhamYiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REVjZy5EjIZDLIZDIoFAoEBQXh3XffRVlZGQBACIEvv/wSoaGhcHZ2hqurK7p3745FixahoKAAAHDixAk8++yzCAwMhEwmw6JFi6y4R0TUmDEcEhE1AI8//jiuXLmCM2fOYPLkyZgzZw4WLlwIABg2bBgmTZqEgQMHYvfu3UhKSsLbb7+NTZs2YefOnQCAgoICtGzZEvPnz4ePj481d4WIGjk+W5mIyMpGjhyJ3NxcbNy4UVrWr18/5OXl4fXXX0dkZCQ2btyIgQMHmnxOCAG9Xg+tVmuyPDAwEJMmTcKkSZPqofdEZGt45pCIqAFydHRESUkJ1qxZg7Zt25oFQwCQyWRmwZCI6G4xHBIRNSBCCPz888/YsWMHHnnkEZw5cwZt27a1dreI6B7CcEhE1ABs2bIFzs7OUKlU6N+/PyIjIzFnzhzwzh8iqm/21u4AEREBffv2xbJly6BQKODn5wd7e+Nfz23atEFKSoqVe0dE9xKeOSQiagCcnJwQFBSEgIAAKRgCwAsvvIDTp09j06ZNZp8RQkCn09VnN4noHsBwSETUgD333HOIjIzE0KFD8cEHH+DQoUP466+/sGXLFoSHh2P37t0AgJKSEiQlJSEpKQklJSW4fPkykpKScPbsWSvvARE1NpzKhojIyixNZXMzg8GAL7/8Et988w1OnDgBe3t7tG7dGsOHD8fYsWPh6OiICxcuoEWLFmaffeihh7Bnz5663QEisikMh0REREQk4WVlIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJP8P6iUtA/HMYEcAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#voir si cluster separer visuellemt \n", + "# ============================================================\n", + "# 10) Visualisation PCA d'un fond\n", + "# ============================================================\n", + "\n", + "example_fund = list(best_partitions.keys())[-1] #2 clusters: 7 et 211\n", + "\n", + "df_plot = best_partitions[example_fund][\"data\"].copy()\n", + "feature_cols = fund_results[example_fund][\"features\"]\n", + "X_scaled = prep_matrix(df_plot, feature_cols)\n", + "\n", + "pca = PCA(n_components=2, random_state=RANDOM_STATE)\n", + "X_pca = pca.fit_transform(X_scaled)\n", + "\n", + "plot_df = pd.DataFrame({\n", + " \"PC1\": X_pca[:, 0],\n", + " \"PC2\": X_pca[:, 1],\n", + " \"cluster\": df_plot[\"cluster\"].astype(str)\n", + "})\n", + "\n", + "plt.figure(figsize=(7, 5))\n", + "sns.scatterplot(data=plot_df, x=\"PC1\", y=\"PC2\", hue=\"cluster\")\n", + "plt.title(f\"PCA - {example_fund}\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "c38c38bf-0dad-4baf-9d2d-acd4087b3bc3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cluster0123
fund_weight_mean0.0025330.0252960.1070300.411546
fund_weight_last0.0000000.0162940.0000650.000000
fund_aum_mean862.7643604892.21343417080.303897198436.767999
fund_aum_last0.0000001587.0128881.0605560.000000
held_month_share0.7570530.7657110.9579970.546661
active_month_share0.1406410.2386540.3924540.535638
entry_count1.5000001.5268291.2222222.500000
exit_count1.5000000.9073171.1111112.500000
turnover_mean2.5681311.8521324.8544341282.868308
turnover_6m_mean40.5892030.44940112.46908844.754050
flow_to_aum_mean-2.567005-1.111582-2.290458-586.056551
flow_to_aum_6m_mean-40.5842200.075295-12.401695-5.753449
corr_flow_ret_1m-0.0213900.0308900.0384660.137629
buy_after_good_perf_share0.0992370.0898150.1312060.030287
\n", + "
" + ], + "text/plain": [ + "cluster 0 1 2 \\\n", + "fund_weight_mean 0.002533 0.025296 0.107030 \n", + "fund_weight_last 0.000000 0.016294 0.000065 \n", + "fund_aum_mean 862.764360 4892.213434 17080.303897 \n", + "fund_aum_last 0.000000 1587.012888 1.060556 \n", + "held_month_share 0.757053 0.765711 0.957997 \n", + "active_month_share 0.140641 0.238654 0.392454 \n", + "entry_count 1.500000 1.526829 1.222222 \n", + "exit_count 1.500000 0.907317 1.111111 \n", + "turnover_mean 2.568131 1.852132 4.854434 \n", + "turnover_6m_mean 40.589203 0.449401 12.469088 \n", + "flow_to_aum_mean -2.567005 -1.111582 -2.290458 \n", + "flow_to_aum_6m_mean -40.584220 0.075295 -12.401695 \n", + "corr_flow_ret_1m -0.021390 0.030890 0.038466 \n", + "buy_after_good_perf_share 0.099237 0.089815 0.131206 \n", + "\n", + "cluster 3 \n", + "fund_weight_mean 0.411546 \n", + "fund_weight_last 0.000000 \n", + "fund_aum_mean 198436.767999 \n", + "fund_aum_last 0.000000 \n", + "held_month_share 0.546661 \n", + "active_month_share 0.535638 \n", + "entry_count 2.500000 \n", + "exit_count 2.500000 \n", + "turnover_mean 1282.868308 \n", + "turnover_6m_mean 44.754050 \n", + "flow_to_aum_mean -586.056551 \n", + "flow_to_aum_6m_mean -5.753449 \n", + "corr_flow_ret_1m 0.137629 \n", + "buy_after_good_perf_share 0.030287 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#profil moyen des clusters par un fond \n", + "\n", + "# ============================================================\n", + "# 11) Profil moyen des clusters pour un fond\n", + "# ============================================================\n", + "\n", + "display(\n", + " best_partitions[example_fund][\"data\"]\n", + " .groupby(\"cluster\")[feature_cols]\n", + " .mean()\n", + " .T\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}