1338 lines
610 KiB
Plaintext
1338 lines
610 KiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "f6ea29f1",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"# Global Clustering \n",
|
|||
|
|
"\n",
|
|||
|
|
"**Sections:**\n",
|
|||
|
|
"1. Imports & Data Loading\n",
|
|||
|
|
"2. Monthly Panel Construction\n",
|
|||
|
|
"3. Feature Engineering (base + enriched)\n",
|
|||
|
|
"4. Global Clustering (all active accounts)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "e727f666",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## 0. Imports & Data Loading"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 1,
|
|||
|
|
"id": "9314f229-0b5d-4a4c-846c-869847d32c73",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import os\n",
|
|||
|
|
"import s3fs\n",
|
|||
|
|
"os.environ[\"AWS_ACCESS_KEY_ID\"] = 'UMMV3Z72A70MCCSRV17O'\n",
|
|||
|
|
"os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'wBFxaez78UPNW3BtchZOf4f238ZNXKnCexeGufaa'\n",
|
|||
|
|
"os.environ[\"AWS_SESSION_TOKEN\"] = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJVTU1WM1o3MkE3ME1DQ1NSVjE3TyIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiKiJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTc3NTEzNTA4NiwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJzYXJhaC50aG91bXlyZUBlbnNhZS5mciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJleHAiOjE3NzYzNDQ3NDksImZhbWlseV9uYW1lIjoiVEhPVU1ZUkUiLCJnaXZlbl9uYW1lIjoiU2FyYWgiLCJncm91cHMiOlsiYmRjLWRhdGEiLCJiZGMtY2FybWlnbmFjLWczIl0sImlhdCI6MTc3NTEzNTE0OCwiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiJlZGY1ZDQ1OC1hYzkxLTQ5NTAtYmI5Ny0zNjMwNWY1MTQwYTIiLCJuYW1lIjoiU2FyYWggVEhPVU1ZUkUiLCJwb2xpY3kiOiJzdHNvbmx5IiwicHJlZmVycmVkX3VzZXJuYW1lIjoic3Rob3VteXJlLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjMzMjg4YjJjLTlhMjAtNDNhOS1iMDlhLTdlMjc1OWQ1NjIxNiIsInN1YiI6ImVhYWVkN2QyLWM4MjYtNGIxNC05MzczLTYwYjNhODhlMWFiNiIsInR5cCI6IkJlYXJlciJ9.rffoTJijRiGK2DCDhXj5y8R31DRH1LWkTwuH_1lvU9qN_xJSTmBIM4uGR_zp7XpMnq_ePwVhlkoWN15cNUgjMA'\n",
|
|||
|
|
"os.environ[\"AWS_DEFAULT_REGION\"] = 'us-east-1'\n",
|
|||
|
|
"fs = s3fs.S3FileSystem(\n",
|
|||
|
|
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
|
|||
|
|
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
|
|||
|
|
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
|
|||
|
|
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"id": "61e33897",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import warnings\n",
|
|||
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"import matplotlib.pyplot as plt\n",
|
|||
|
|
"import seaborn as sns\n",
|
|||
|
|
"\n",
|
|||
|
|
"from sklearn.preprocessing import RobustScaler\n",
|
|||
|
|
"from sklearn.cluster import KMeans\n",
|
|||
|
|
"from sklearn.metrics import silhouette_score, davies_bouldin_score, pairwise_distances\n",
|
|||
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
|
"\n",
|
|||
|
|
"sns.set_style(\"whitegrid\")\n",
|
|||
|
|
"pd.set_option(\"display.max_columns\", 100)\n",
|
|||
|
|
"\n",
|
|||
|
|
"EPS = 1e-9\n",
|
|||
|
|
"RANDOM_STATE = 42\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Column names\n",
|
|||
|
|
"ID_COL = \"Registrar Account - ID\"\n",
|
|||
|
|
"ISIN_COL = \"Product - Isin\"\n",
|
|||
|
|
"FUND_COL = \"Product - Fund\"\n",
|
|||
|
|
"ASSET_COL = \"Product - Asset Type\"\n",
|
|||
|
|
"FLOW_DATE_COL = \"Centralisation Date\"\n",
|
|||
|
|
"AUM_DATE_COL = \"Centralisation Date\"\n",
|
|||
|
|
"FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
|
|||
|
|
"FLOW_SUB_COL = \"Quantity - Subscription\"\n",
|
|||
|
|
"FLOW_RED_COL = \"Quantity - Redemption\"\n",
|
|||
|
|
"AUM_QTY_COL = \"Quantity - AUM\"\n",
|
|||
|
|
"AUM_VAL_COL = \"Value - AUM €\"\n",
|
|||
|
|
"REGION_COL = \"Registrar Account - Region\"\n",
|
|||
|
|
"COUNTRY_COL = \"RegistrarAccount - Country\"\n",
|
|||
|
|
"NAV_DATE_COL = \"Dat\"\n",
|
|||
|
|
"NAV_ISIN_COL = \"Isin\"\n",
|
|||
|
|
"NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
|
|||
|
|
"NAV_BENCH_COL = \"PriceBench\"\n",
|
|||
|
|
"RATE_DATE_COL = \"Date\"\n",
|
|||
|
|
"RATE_VAL_COL = \"Yld to Maturity\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"#external data projet-bdc-data /carmignac /Data Modélisation /Nav\n",
|
|||
|
|
"PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\" #C’est la table de valorisation / performance du produit.\n",
|
|||
|
|
"PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# optional competitors\n",
|
|||
|
|
"PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
|
|||
|
|
"PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
|
|||
|
|
"PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\""
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 3,
|
|||
|
|
"id": "eb3b2908",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"flows: (2574461, 26)\n",
|
|||
|
|
"aum: (4880297, 19)\n",
|
|||
|
|
"nav: (623914, 6)\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_flows = pd.read_csv(\"flows.csv\", low_memory=False)\n",
|
|||
|
|
"df_aum = df_aum = pd.read_csv(\"s3://projet-bdc-carmignac-g3/paco/AUM_repaired.csv\", low_memory=False)\n",
|
|||
|
|
"df_nav = pd.read_csv(PATH_NAV, sep=\";\")\n",
|
|||
|
|
"df_rates = pd.read_csv(PATH_RATES, sep=\";\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Date parsing\n",
|
|||
|
|
"for df, col in [\n",
|
|||
|
|
" (df_flows, FLOW_DATE_COL), (df_aum, AUM_DATE_COL),\n",
|
|||
|
|
" (df_nav, NAV_DATE_COL), (df_rates, RATE_DATE_COL)\n",
|
|||
|
|
"]:\n",
|
|||
|
|
" df[col] = pd.to_datetime(df[col], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Month column\n",
|
|||
|
|
"for df, col in [(df_flows, FLOW_DATE_COL), (df_aum, AUM_DATE_COL),\n",
|
|||
|
|
" (df_nav, NAV_DATE_COL), (df_rates, RATE_DATE_COL)]:\n",
|
|||
|
|
" df[\"month\"] = df[col].dt.to_period(\"M\").dt.to_timestamp()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Numeric coercion\n",
|
|||
|
|
"for col in [FLOW_QTY_COL, FLOW_SUB_COL, FLOW_RED_COL]:\n",
|
|||
|
|
" df_flows[col] = pd.to_numeric(df_flows[col], errors=\"coerce\")\n",
|
|||
|
|
"for col in [AUM_QTY_COL, AUM_VAL_COL]:\n",
|
|||
|
|
" df_aum[col] = pd.to_numeric(df_aum[col], errors=\"coerce\")\n",
|
|||
|
|
"for col in [NAV_PRICE_COL, NAV_BENCH_COL]:\n",
|
|||
|
|
" df_nav[col] = pd.to_numeric(df_nav[col], errors=\"coerce\")\n",
|
|||
|
|
"df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ISIN as string\n",
|
|||
|
|
"for df in [df_flows, df_aum]:\n",
|
|||
|
|
" df[ISIN_COL] = df[ISIN_COL].astype(str).str.strip()\n",
|
|||
|
|
"df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"flows:\", df_flows.shape)\n",
|
|||
|
|
"print(\"aum: \", df_aum.shape)\n",
|
|||
|
|
"print(\"nav: \", df_nav.shape)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "5929db69",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## 1. Monthly Panel Construction\n",
|
|||
|
|
"\n",
|
|||
|
|
"Build a full outer join of AUM and flows at (account, ISIN, month) granularity,\n",
|
|||
|
|
"then enrich with NAV performance and macro rates."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 73,
|
|||
|
|
"id": "d36d0a70",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Panel shape: (4791501, 20)\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# --- Flows aggregated to monthly ---\n",
|
|||
|
|
"df_flows_m = (\n",
|
|||
|
|
" df_flows\n",
|
|||
|
|
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
|
|||
|
|
" .assign(\n",
|
|||
|
|
" gross_flow_qty = lambda x: x[FLOW_QTY_COL].abs(),\n",
|
|||
|
|
" sub_qty = lambda x: x[FLOW_SUB_COL].fillna(0),\n",
|
|||
|
|
" red_qty = lambda x: x[FLOW_RED_COL].fillna(0)\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" net_flow_qty = (FLOW_QTY_COL, \"sum\"),\n",
|
|||
|
|
" gross_flow_qty = (\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" sub_qty = (\"sub_qty\", \"sum\"),\n",
|
|||
|
|
" red_qty = (\"red_qty\", \"sum\"),\n",
|
|||
|
|
" n_tx = (FLOW_QTY_COL, \"size\"),\n",
|
|||
|
|
" region = (REGION_COL, \"last\"),\n",
|
|||
|
|
" country = (COUNTRY_COL, \"last\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- AUM aggregated to monthly ---\n",
|
|||
|
|
"df_aum_m = (\n",
|
|||
|
|
" df_aum\n",
|
|||
|
|
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
|
|||
|
|
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" aum_qty = (AUM_QTY_COL, \"sum\"),\n",
|
|||
|
|
" aum_val = (AUM_VAL_COL, \"sum\"),\n",
|
|||
|
|
" fund = (FUND_COL, \"last\"),\n",
|
|||
|
|
" asset_type = (ASSET_COL, \"last\"),\n",
|
|||
|
|
" region = (REGION_COL, \"last\"),\n",
|
|||
|
|
" country = (COUNTRY_COL, \"last\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Full outer join ---\n",
|
|||
|
|
"keys = pd.concat([\n",
|
|||
|
|
" df_flows_m[[ID_COL, ISIN_COL, \"month\"]],\n",
|
|||
|
|
" df_aum_m[[ID_COL, ISIN_COL, \"month\"]]\n",
|
|||
|
|
"]).drop_duplicates()\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m = (\n",
|
|||
|
|
" keys\n",
|
|||
|
|
" .merge(df_aum_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\")\n",
|
|||
|
|
" .merge(df_flows_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_flow\"))\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for c in [\"aum_qty\",\"aum_val\",\"net_flow_qty\",\"gross_flow_qty\",\"sub_qty\",\"red_qty\",\"n_tx\"]:\n",
|
|||
|
|
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m[\"region\"] = df_rel_m[\"region\"].fillna(df_rel_m.get(\"region_flow\"))\n",
|
|||
|
|
"df_rel_m[\"country\"] = df_rel_m[\"country\"].fillna(df_rel_m.get(\"country_flow\"))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Active / holding flags ---\n",
|
|||
|
|
"df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"df_rel_m[\"holding_rel_month\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
|
|||
|
|
"df_rel_m[\"flow_to_aum_rel\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
|
|||
|
|
"df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Panel shape:\", df_rel_m.shape)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 74,
|
|||
|
|
"id": "965d2564",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Enriched panel shape: (4791501, 24)\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# --- NAV & benchmark returns ---\n",
|
|||
|
|
"df_nav_m = (\n",
|
|||
|
|
" df_nav\n",
|
|||
|
|
" .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
|
|||
|
|
" .sort_values([NAV_ISIN_COL, \"month\"])\n",
|
|||
|
|
" .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .tail(1).copy()\n",
|
|||
|
|
")\n",
|
|||
|
|
"df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
|
|||
|
|
"df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
|
|||
|
|
"df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
|
|||
|
|
"df_nav_m = df_nav_m.rename(columns={NAV_ISIN_COL: ISIN_COL})[\n",
|
|||
|
|
" [ISIN_COL, \"month\", \"ret_fund_m\", \"ret_bench_m\", \"active_return_m\"]\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Interest rates ---\n",
|
|||
|
|
"df_rates_m = (\n",
|
|||
|
|
" df_rates\n",
|
|||
|
|
" .dropna(subset=[\"month\", RATE_VAL_COL])\n",
|
|||
|
|
" .sort_values(RATE_DATE_COL)\n",
|
|||
|
|
" .groupby(\"month\", as_index=False).tail(1).copy()\n",
|
|||
|
|
")\n",
|
|||
|
|
"df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
|
|||
|
|
"df_rates_m = df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Merge into panel ---\n",
|
|||
|
|
"df_rel_m = df_rel_m.merge(df_nav_m, on=[ISIN_COL, \"month\"], how=\"left\")\n",
|
|||
|
|
"df_rel_m = df_rel_m.merge(df_rates_m[[\"month\",\"delta_rate_m\"]], on=\"month\", how=\"left\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for c in [\"ret_fund_m\",\"ret_bench_m\",\"active_return_m\",\"delta_rate_m\"]:\n",
|
|||
|
|
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Enriched panel shape:\", df_rel_m.shape)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "1ef7bba8",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## 2. Feature Engineering\n",
|
|||
|
|
"\n",
|
|||
|
|
"### 2a. Monthly account-level aggregation\n",
|
|||
|
|
"### 2b. ISIN-level features (where / when investors put their money)\n",
|
|||
|
|
"### 2c. Asset type & fund composition features\n",
|
|||
|
|
"### 2d. Rolling metrics (1M / 3M / 6M)\n",
|
|||
|
|
"### 2e. Behavioural features (entry/exit, momentum, rate sensitivity)\n",
|
|||
|
|
"### 2f. Trend & beta features"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 168,
|
|||
|
|
"id": "db5a297c-78ea-4048-98f8-624612fbb60d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"df_month shape: (931333, 21)\n",
|
|||
|
|
"ISIN-level client features: (12584, 12)\n",
|
|||
|
|
"Asset shares: (7475, 6)\n",
|
|||
|
|
"Fund shares: (6562, 11)\n",
|
|||
|
|
"Rolling features: (12584, 3)\n",
|
|||
|
|
"df_client shape: (12584, 46)\n",
|
|||
|
|
"After outlier removal: 7179 accounts\n",
|
|||
|
|
"dfc shape: (7179, 60)\n",
|
|||
|
|
"gross_flow_to_aum: min=0.0000, max=270698.4708, nan=336, inf=0\n",
|
|||
|
|
"flow_direction_balance: min=-1.0000, max=1.0000, nan=1069, inf=0\n",
|
|||
|
|
"sub_share_mean: min=-0.0985, max=77.5673, nan=1069, inf=0\n",
|
|||
|
|
"redemption_bias: min=-154.1345, max=0.0828, nan=1069, inf=0\n",
|
|||
|
|
"entry_rate_per_isin: min=0.0217, max=5.0000, nan=0, inf=0\n",
|
|||
|
|
"aum_final_to_peak: min=-2.4841, max=1.0000, nan=0, inf=0\n",
|
|||
|
|
"flow_roll3m_norm: min=-4935000000000.0000, max=1400000000000.0000, nan=0, inf=0\n",
|
|||
|
|
"flow_roll6m_norm: min=-8699999999999.9990, max=322428000000.0000, nan=0, inf=0\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# 2a. Monthly account-level panel\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"tmp = df_rel_m.copy()\n",
|
|||
|
|
"tmp[\"isin_held_flag\"] = (tmp[\"aum_qty\"] > 0).astype(int)\n",
|
|||
|
|
"tmp[\"isin_active_flag\"] = (tmp[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month = (\n",
|
|||
|
|
" tmp.groupby([ID_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" aum_qty = (\"aum_qty\", \"sum\"),\n",
|
|||
|
|
" aum_val = (\"aum_val\", \"sum\"),\n",
|
|||
|
|
" net_flow_qty = (\"net_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty = (\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" sub_qty = (\"sub_qty\", \"sum\"),\n",
|
|||
|
|
" red_qty = (\"red_qty\", \"sum\"),\n",
|
|||
|
|
" n_tx = (\"n_tx\", \"sum\"),\n",
|
|||
|
|
" n_isin_held = (\"isin_held_flag\", \"sum\"),\n",
|
|||
|
|
" n_isin_active = (\"isin_active_flag\",\"sum\"),\n",
|
|||
|
|
" delta_rate_m = (\"delta_rate_m\", \"first\"),\n",
|
|||
|
|
" region = (\"region\", \"first\"),\n",
|
|||
|
|
" country = (\"country\", \"first\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values([ID_COL, \"month\"])\n",
|
|||
|
|
" .reset_index(drop=True)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month[\"flow_to_aum_m\"] = np.where(\n",
|
|||
|
|
" df_month[\"aum_qty\"].abs() > 0,\n",
|
|||
|
|
" df_month[\"net_flow_qty\"] / df_month[\"aum_qty\"].abs(),\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"df_month[\"turnover_m\"] = np.where(\n",
|
|||
|
|
" df_month[\"aum_qty\"].abs() > 0,\n",
|
|||
|
|
" df_month[\"gross_flow_qty\"] / df_month[\"aum_qty\"].abs(),\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"df_month[\"sub_share_m\"] = np.where(\n",
|
|||
|
|
" df_month[\"gross_flow_qty\"] > 0,\n",
|
|||
|
|
" df_month[\"sub_qty\"] / df_month[\"gross_flow_qty\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"df_month[\"red_share_m\"] = np.where(\n",
|
|||
|
|
" df_month[\"gross_flow_qty\"] > 0,\n",
|
|||
|
|
" df_month[\"red_qty\"] / df_month[\"gross_flow_qty\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"df_month[\"aum_peak_to_date\"] = df_month.groupby(ID_COL)[\"aum_qty\"].cummax()\n",
|
|||
|
|
"df_month[\"aum_drawdown\"] = np.where(\n",
|
|||
|
|
" df_month[\"aum_peak_to_date\"] > 0,\n",
|
|||
|
|
" 1 - df_month[\"aum_qty\"] / df_month[\"aum_peak_to_date\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"df_month shape:\", df_month.shape)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# 2b. ISIN-level features\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"tmp = df_rel_m.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
|
|||
|
|
"tmp[\"prev_aum\"] = tmp.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
|
|||
|
|
"tmp[\"entry_event\"] = ((tmp[\"prev_aum\"].fillna(0) <= 0) & (tmp[\"aum_qty\"] > 0)).astype(int)\n",
|
|||
|
|
"tmp[\"full_exit_event\"] = ((tmp[\"prev_aum\"] > 0) & (tmp[\"aum_qty\"] <= 0)).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Lag de 1 mois pour la réaction à la performance (causalité correcte)\n",
|
|||
|
|
"tmp[\"ret_fund_m_lag1\"] = tmp.groupby([ID_COL, ISIN_COL])[\"ret_fund_m\"].shift(1)\n",
|
|||
|
|
"tmp[\"buy_on_perf\"] = ((tmp[\"net_flow_qty\"] > 0) & (tmp[\"ret_fund_m_lag1\"] > 0)).astype(int)\n",
|
|||
|
|
"tmp[\"sell_on_perf\"] = ((tmp[\"net_flow_qty\"] < 0) & (tmp[\"ret_fund_m_lag1\"] < 0)).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_feat = (\n",
|
|||
|
|
" tmp.groupby([ID_COL, ISIN_COL], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" rel_n_months = (\"month\", \"nunique\"),\n",
|
|||
|
|
" rel_active_months = (\"active_rel_month\", \"sum\"),\n",
|
|||
|
|
" rel_holding_months = (\"holding_rel_month\", \"sum\"),\n",
|
|||
|
|
" rel_aum_mean = (\"aum_qty\", \"mean\"),\n",
|
|||
|
|
" rel_turnover_mean = (\"turnover_rel\", \"mean\"),\n",
|
|||
|
|
" rel_turnover_vol = (\"turnover_rel\", \"std\"),\n",
|
|||
|
|
" rel_flow_to_aum_vol = (\"flow_to_aum_rel\", \"std\"),\n",
|
|||
|
|
" rel_n_tx = (\"n_tx\", \"sum\"),\n",
|
|||
|
|
" rel_full_exit_count = (\"full_exit_event\", \"sum\"),\n",
|
|||
|
|
" rel_entry_count = (\"entry_event\", \"sum\"),\n",
|
|||
|
|
" buy_on_perf_rate = (\"buy_on_perf\", \"mean\"),\n",
|
|||
|
|
" sell_on_perf_rate = (\"sell_on_perf\", \"mean\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"isin_aum = df_rel_feat.groupby(ID_COL)[\"rel_aum_mean\"].transform(\"sum\")\n",
|
|||
|
|
"df_rel_feat[\"isin_weight\"] = np.where(\n",
|
|||
|
|
" isin_aum > 0,\n",
|
|||
|
|
" df_rel_feat[\"rel_aum_mean\"] / isin_aum,\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"hhi_isin = (\n",
|
|||
|
|
" df_rel_feat.groupby(ID_COL)[\"isin_weight\"]\n",
|
|||
|
|
" .apply(lambda w: np.sum(w**2))\n",
|
|||
|
|
" .reset_index(name=\"hhi_isin\")\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_client = (\n",
|
|||
|
|
" df_rel_feat.groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_isin_total = (ISIN_COL, \"nunique\"),\n",
|
|||
|
|
" rel_turnover_mean_avg = (\"rel_turnover_mean\", \"mean\"),\n",
|
|||
|
|
" rel_turnover_vol_avg = (\"rel_turnover_vol\", \"mean\"),\n",
|
|||
|
|
" rel_flow_to_aum_vol_avg = (\"rel_flow_to_aum_vol\",\"mean\"),\n",
|
|||
|
|
" full_exit_count = (\"rel_full_exit_count\",\"sum\"),\n",
|
|||
|
|
" entry_count = (\"rel_entry_count\", \"sum\"),\n",
|
|||
|
|
" avg_holding_months_per_isin = (\"rel_holding_months\", \"mean\"),\n",
|
|||
|
|
" max_holding_months_per_isin = (\"rel_holding_months\", \"max\"),\n",
|
|||
|
|
" buy_on_perf_rate_avg = (\"buy_on_perf_rate\", \"mean\"),\n",
|
|||
|
|
" sell_on_perf_rate_avg = (\"sell_on_perf_rate\", \"mean\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .merge(hhi_isin, on=ID_COL, how=\"left\")\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"ISIN-level client features:\", df_rel_client.shape)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# 2c. Asset type & fund composition features\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"aum_by_asset = (\n",
|
|||
|
|
" df_aum.dropna(subset=[ID_COL, ASSET_COL])\n",
|
|||
|
|
" .groupby([ID_COL, ASSET_COL], as_index=False)[AUM_VAL_COL].sum()\n",
|
|||
|
|
")\n",
|
|||
|
|
"total_aum_acc = aum_by_asset.groupby(ID_COL)[AUM_VAL_COL].sum().rename(\"total_aum\")\n",
|
|||
|
|
"aum_by_asset = aum_by_asset.merge(total_aum_acc, on=ID_COL)\n",
|
|||
|
|
"aum_by_asset[\"share\"] = np.where(\n",
|
|||
|
|
" aum_by_asset[\"total_aum\"] > 0,\n",
|
|||
|
|
" aum_by_asset[AUM_VAL_COL] / aum_by_asset[\"total_aum\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"asset_shares = (\n",
|
|||
|
|
" aum_by_asset.pivot_table(index=ID_COL, columns=ASSET_COL, values=\"share\", aggfunc=\"mean\")\n",
|
|||
|
|
" .fillna(0).reset_index()\n",
|
|||
|
|
")\n",
|
|||
|
|
"asset_shares.columns = [ID_COL] + [\n",
|
|||
|
|
" f\"share_asset_{c.lower().replace(' ','_')}\" for c in asset_shares.columns[1:]\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"aum_by_fund = (\n",
|
|||
|
|
" df_aum.dropna(subset=[ID_COL, FUND_COL])\n",
|
|||
|
|
" .groupby([ID_COL, FUND_COL], as_index=False)[AUM_VAL_COL].sum()\n",
|
|||
|
|
")\n",
|
|||
|
|
"aum_by_fund = aum_by_fund.merge(total_aum_acc, on=ID_COL)\n",
|
|||
|
|
"aum_by_fund[\"share\"] = np.where(\n",
|
|||
|
|
" aum_by_fund[\"total_aum\"] > 0,\n",
|
|||
|
|
" aum_by_fund[AUM_VAL_COL] / aum_by_fund[\"total_aum\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"top_funds = aum_by_fund.groupby(FUND_COL)[AUM_VAL_COL].sum().nlargest(10).index\n",
|
|||
|
|
"fund_shares = (\n",
|
|||
|
|
" aum_by_fund[aum_by_fund[FUND_COL].isin(top_funds)]\n",
|
|||
|
|
" .pivot_table(index=ID_COL, columns=FUND_COL, values=\"share\", aggfunc=\"mean\")\n",
|
|||
|
|
" .fillna(0).reset_index()\n",
|
|||
|
|
")\n",
|
|||
|
|
"fund_shares.columns = [ID_COL] + [\n",
|
|||
|
|
" f\"share_fund_{c.lower().replace(' ','_')[:30]}\" for c in fund_shares.columns[1:]\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Asset shares:\", asset_shares.shape)\n",
|
|||
|
|
"print(\"Fund shares: \", fund_shares.shape)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# 2d. Rolling metrics — supprimées car trop sparse (80-90% zéros)\n",
|
|||
|
|
"# On garde uniquement flow_roll3m et flow_roll6m comme signaux\n",
|
|||
|
|
"# de tendance récente, normalisés par l'AUM\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"df_month_s = df_month.sort_values([ID_COL, \"month\"]).copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"for w in [3, 6]:\n",
|
|||
|
|
" df_month_s[f\"flow_roll{w}m_norm\"] = (\n",
|
|||
|
|
" df_month_s.groupby(ID_COL)\n",
|
|||
|
|
" .apply(lambda g: (\n",
|
|||
|
|
" g[\"net_flow_qty\"].rolling(w, min_periods=1).sum() /\n",
|
|||
|
|
" (g[\"aum_qty\"].abs().rolling(w, min_periods=1).mean() + EPS)\n",
|
|||
|
|
" ))\n",
|
|||
|
|
" .reset_index(level=0, drop=True)\n",
|
|||
|
|
" )\n",
|
|||
|
|
"\n",
|
|||
|
|
"rolling_feats = (\n",
|
|||
|
|
" df_month_s.groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .last()[[ID_COL, \"flow_roll3m_norm\", \"flow_roll6m_norm\"]]\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Rolling features:\", rolling_feats.shape)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# 2e. Static client features\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"df_client = (\n",
|
|||
|
|
" df_month.groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_months = (\"month\", \"nunique\"),\n",
|
|||
|
|
" n_active_months = (\"active_month\", \"sum\"),\n",
|
|||
|
|
" flow_freq = (\"active_month\", \"mean\"),\n",
|
|||
|
|
" aum_qty_mean = (\"aum_qty\", \"mean\"),\n",
|
|||
|
|
" aum_qty_median = (\"aum_qty\", \"median\"),\n",
|
|||
|
|
" aum_qty_max = (\"aum_qty\", \"max\"),\n",
|
|||
|
|
" aum_qty_last = (\"aum_qty\", \"last\"),\n",
|
|||
|
|
" net_flow_qty_sum = (\"net_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_sum = (\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" sub_qty_sum = (\"sub_qty\", \"sum\"),\n",
|
|||
|
|
" red_qty_sum = (\"red_qty\", \"sum\"),\n",
|
|||
|
|
" n_tx_total = (\"n_tx\", \"sum\"),\n",
|
|||
|
|
" avg_n_isin_held = (\"n_isin_held\", \"mean\"),\n",
|
|||
|
|
" max_n_isin_held = (\"n_isin_held\", \"max\"),\n",
|
|||
|
|
" aum_drawdown_last = (\"aum_drawdown\", \"last\"),\n",
|
|||
|
|
" region = (\"region\", \"last\"),\n",
|
|||
|
|
" country = (\"country\", \"last\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_client = (\n",
|
|||
|
|
" df_client\n",
|
|||
|
|
" .merge(df_rel_client, on=ID_COL, how=\"left\")\n",
|
|||
|
|
" .merge(asset_shares, on=ID_COL, how=\"left\")\n",
|
|||
|
|
" .merge(fund_shares, on=ID_COL, how=\"left\")\n",
|
|||
|
|
" .merge(rolling_feats, on=ID_COL, how=\"left\")\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"df_client shape:\", df_client.shape)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# 2f. Engineered ratios + filtres\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"dfc = df_client.copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"log_aum_qty_mean\"] = np.log1p(dfc[\"aum_qty_mean\"].clip(lower=0))\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"gross_flow_to_aum\"] = np.where(\n",
|
|||
|
|
" dfc[\"aum_qty_mean\"] > 1,\n",
|
|||
|
|
" dfc[\"gross_flow_qty_sum\"] / dfc[\"aum_qty_mean\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"flow_direction_balance\"] = np.where(\n",
|
|||
|
|
" dfc[\"gross_flow_qty_sum\"] > 0,\n",
|
|||
|
|
" dfc[\"net_flow_qty_sum\"] / dfc[\"gross_flow_qty_sum\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"sub_share_mean\"] = np.where(\n",
|
|||
|
|
" dfc[\"gross_flow_qty_sum\"] > 0,\n",
|
|||
|
|
" dfc[\"sub_qty_sum\"] / dfc[\"gross_flow_qty_sum\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"redemption_bias\"] = np.where(\n",
|
|||
|
|
" dfc[\"gross_flow_qty_sum\"] > 0,\n",
|
|||
|
|
" (dfc[\"red_qty_sum\"] - dfc[\"sub_qty_sum\"]) / dfc[\"gross_flow_qty_sum\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"activity_intensity\"] = np.where(\n",
|
|||
|
|
" dfc[\"n_months\"] > 0,\n",
|
|||
|
|
" dfc[\"n_tx_total\"] / dfc[\"n_months\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"exit_rate_per_isin\"] = np.where(\n",
|
|||
|
|
" dfc[\"n_isin_total\"] > 0,\n",
|
|||
|
|
" dfc[\"full_exit_count\"] / dfc[\"n_isin_total\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"entry_rate_per_isin\"] = np.where(\n",
|
|||
|
|
" dfc[\"n_isin_total\"] > 0,\n",
|
|||
|
|
" dfc[\"entry_count\"] / dfc[\"n_isin_total\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"aum_final_to_peak\"] = np.where(\n",
|
|||
|
|
" dfc[\"aum_qty_max\"] > 0,\n",
|
|||
|
|
" dfc[\"aum_qty_last\"] / dfc[\"aum_qty_max\"],\n",
|
|||
|
|
" np.nan\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc[\"aum_drawdown_last\"] = dfc[\"aum_drawdown_last\"].clip(0, 1)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Log-transforms sur variables de taille brute (non utilisées en clustering)\n",
|
|||
|
|
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\"]:\n",
|
|||
|
|
" dfc[f\"log_{col}\"] = np.log1p(dfc[col].clip(lower=0))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Filtres qualité\n",
|
|||
|
|
"dfc = dfc[(dfc[\"n_months\"] >= 6) & (dfc[\"aum_qty_mean\"] > 0)].copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Retrait des outliers sur variables de taille\n",
|
|||
|
|
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\"]:\n",
|
|||
|
|
" cap = dfc[col].quantile(0.99)\n",
|
|||
|
|
" dfc = dfc[dfc[col] <= cap].copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(f\"After outlier removal: {len(dfc)} accounts\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Regroupement géographique\n",
|
|||
|
|
"top_countries = dfc[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
|
|||
|
|
"top_regions = dfc[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
|
|||
|
|
"dfc[\"country_grp\"] = np.where(dfc[\"country\"].isin(top_countries), dfc[\"country\"], \"Other\")\n",
|
|||
|
|
"dfc[\"region_grp\"] = np.where(dfc[\"region\"].isin(top_regions), dfc[\"region\"], \"Other\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_last_active = (\n",
|
|||
|
|
" df_month[df_month[\"active_month\"] == 1]\n",
|
|||
|
|
" .groupby(ID_COL)[\"month\"]\n",
|
|||
|
|
" .max()\n",
|
|||
|
|
" .reset_index(name=\"last_active_month\")\n",
|
|||
|
|
")\n",
|
|||
|
|
"reference_date = df_month[\"month\"].max()\n",
|
|||
|
|
"df_last_active[\"months_since_last_tx\"] = (\n",
|
|||
|
|
" (reference_date.to_period(\"M\") - df_last_active[\"last_active_month\"].dt.to_period(\"M\"))\n",
|
|||
|
|
" .apply(lambda x: x.n)\n",
|
|||
|
|
")\n",
|
|||
|
|
"dfc = dfc.merge(df_last_active[[ID_COL, \"months_since_last_tx\"]], on=ID_COL, how=\"left\")\n",
|
|||
|
|
"max_months = dfc[\"months_since_last_tx\"].max()\n",
|
|||
|
|
"dfc[\"months_since_last_tx\"] = dfc[\"months_since_last_tx\"].fillna(max_months + 1)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"dfc shape:\", dfc.shape)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "d180b613",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## 3. Global Clustering (all active accounts)\n",
|
|||
|
|
"\n",
|
|||
|
|
"Baseline clustering on all accounts with sufficient history."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "55ab41d3-20f6-4559-8e38-68420b4230b1",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"**Feature set final retenu**\n",
|
|||
|
|
"\n",
|
|||
|
|
"- flow_freq : proportion de mois avec au moins une transaction — mesure la fréquence globale d'activité du compte\n",
|
|||
|
|
"- gross_flow_to_aum : volume brut de flux rapporté à l'AUM moyen — mesure l'intensité des transactions indépendamment de leur direction, après clip p90 et log-transform\n",
|
|||
|
|
"- n_isin_total : nombre total d'ISINs distincts détenus sur toute la période — capte l'étendue du portefeuille exploré\n",
|
|||
|
|
"- avg_holding_months_per_isin : durée moyenne de détention par ISIN — capte la fidélité aux produits\n",
|
|||
|
|
"- exit_rate_per_isin : nombre moyen de sorties complètes par ISIN — mesure la propension à liquider ses positions\n",
|
|||
|
|
"- flow_direction_balance : ratio flux nets sur flux bruts — distingue les acheteurs nets des vendeurs nets sur l'ensemble de la période\n",
|
|||
|
|
"- log_aum_qty_mean : logarithme de l'AUM moyen — seule variable de taille retenue, incluse pour distinguer des comportements identiques sur des niveaux d'engagement très différents\n",
|
|||
|
|
"- months_since_last_tx : nombre de mois écoulés depuis la dernière transaction — variable de récence, la plus discriminante du set (ratio inter/total de 0.89)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 197,
|
|||
|
|
"id": "353c7d48-4644-4427-ac4b-02e3f3e31690",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"flow_freq — min=0.0000, max=1.0000, valeurs > 1 : 0\n",
|
|||
|
|
"Accounts: 7179 | Features: 8\n",
|
|||
|
|
"Points > 5 std after scaling: 0 (0.0%)\n",
|
|||
|
|
"\n",
|
|||
|
|
"Features with most extreme values (>5 std):\n",
|
|||
|
|
"Series([], )\n",
|
|||
|
|
"\n",
|
|||
|
|
"K=4 | sil=0.2312 | db=1.5109\n",
|
|||
|
|
"\n",
|
|||
|
|
"=== Tailles des clusters ===\n",
|
|||
|
|
" n_comptes pct\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 2708 37.7\n",
|
|||
|
|
"1 1174 16.4\n",
|
|||
|
|
"2 1476 20.6\n",
|
|||
|
|
"3 1821 25.4\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMkAAAGGCAYAAABhZtaKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdYU9cbwPFvWCJ7ioKiAgIOUHDi1lptHa3aumfduOpqHXXUXeve2rr3qIp1a9Vq69aqdaC1jloXMmXLur8/KNEIaEAg/PT9PE8ezbnnnvsektwkb845V6UoioIQQgghhBBCCCGEEO8xPV0HIIQQQgghhBBCCCGErkmSTAghhBBCCCGEEEK89yRJJoQQQgghhBBCCCHee5IkE0IIIYQQQgghhBDvPUmSCSGEEEIIIYQQQoj3niTJhBBCCCGEEEIIIcR7T5JkQgghhBBCCCGEEOK9J0kyIYQQQgghhBBCCPHekySZEEIIIYQQQgghhHjvSZJMCCF0pH79+owYMULXYeSp7du34+HhwYMHD3QdihBCS3/++SflypXj4cOHug5F59LOYVeuXNF1KOI/GzdupG7duiQkJOg6FCGEEO8ASZIJIUQOu3//PmPHjuWDDz7Ay8sLX19f2rZty+rVq4mPj8+TGOLi4pg/fz5nzpzJk+P9v9q1axerVq3SdRj50vPnz1m1ahWtWrWiYsWKeHl50ahRIyZMmMDdu3d1HV6ue1efG0uWLOGXX37J0j6zZ8+mSZMmODk5qcs6depE06ZN09U9deoU5cuXp0WLFkRERLxtuGr379/Hy8vrvU1QHTt2jPnz5+s6jHypZcuWJCYmsmnTJl2HIoQQ4h0gSTIhhMhBv/76K82aNWPfvn3Uq1ePMWPGMHToUBwdHZk+fTqTJ0/Okzji4uJYsGABZ8+ezZPjaevTTz/lzz//1PiyrUu7d+9mzZo1ug4j3wkLC6Ndu3ZMnToVW1tbBg4cqE78HjlyhGbNmuk6xFz3rj43li5dmqUkWWBgICdPnqRt27ZvrHvq1Cn69OlDyZIlWblyJVZWVm8RqaYpU6ZgYGCQY+39vzl27BgLFizQdRj5UoECBWjevDmrVq1CURRdhyOEEOL/3Pv7aUMIIXLYv//+y+DBg3F0dGT16tUUKlRIva1Dhw78888//Prrr7oLMAfExsZiYmKS7f319fXR19fPwYjyp7i4OAoWLKjrMLJt5MiRBAYGMm/ePBo1aqSxbdCgQcyePVtHkeW+t32Ov2u2bduGo6MjFSpUeG29s2fP4u/vT4kSJXI8Qfbbb7/x+++/06NHDxYvXpxj7QKkpKSQmJhIgQIFcrRdkX3Pnz/H0NAQPT3tf8v/+OOPWbZsGadPn8bPzy8XoxNCCPGuk5FkQgiRQ5YtW0ZsbCyTJ0/WSJClKV68OF26dMl0//nz5+Ph4ZGuPKN1vK5cuUL37t2pWrUq3t7e1K9fn5EjRwLw4MED9ZeEBQsW4OHhgYeHh8ZUndu3bzNw4ECqVKmCl5cXLVu25PDhwxke9+zZs3z77bf4+flRp06d1/4N1q5dS5MmTShfvjyVK1emZcuW7Nq167V9SUlJYf78+dSsWZPy5cvTqVMn/v7773RrtqXte+HCBaZOnUq1atWoUKEC/fr1IywsTCOOX375hV69elGzZk3KlStHgwYNWLhwIcnJyeo6nTp14tdff+Xhw4fqv1H9+vUzjRPgzJkzeHh4aExjTZt2dvXqVTp06ED58uWZNWsWAAkJCcybN48PP/yQcuXKUadOHb7//vt0a+ecOHGCdu3aUalSJXx8fGjUqJG6jbx2+fJlfv31Vz7//PN0CTIAIyMjhg8frlF26tQp2rdvT4UKFahUqRL+/v7cvn1bo07a8/vu3bsMGzaMihUrUq1aNebMmYOiKDx+/Bh/f398fX2pUaMGK1as0Ng/7W+/d+9eZs2aRY0aNahQoQJ9+vTh8ePH6eLct28fLVu2xNvbm6pVqzJs2DCCgoI06owYMQIfHx/u379Pz5498fHxYdiwYa99boD2j6uHhwcTJkxg3759NG7cGG9vb9q0acPNmzcB2LRpEx9++CFeXl506tQpw7X6Ll++TPfu3alYsSLly5enY8eOXLhwIcO/7T///MOIESOoVKkSFStWZOTIkcTFxWnEExsby44dO9T9etO6iIcPH6ZatWqoVKpM65w/f57evXvj7OzMypUrsba2fm2bWZGYmMjkyZPp3Lkzzs7Ob91e2mPy888/06RJE7y8vPjtt98AuH79Oj169MDX1xcfHx+6dOnCpUuXMmwnPj6esWPHUrVqVXx9ffn666959uxZumNlNEXy1XNbYmIiCxYsoGHDhnh5eVG1alXatWvHiRMngNTn6fr169Vtpt0yk/Z8yOimzTqYbzqPAwQFBTFq1Cj1ObZ+/fqMGzdO4zXw77//qt9nypcvT+vWrdP9UJT2ut6zZw+zZ8+mVq1alC9fnujoaEC75z9AuXLlsLKySvc+JoQQQmSVjCQTQogccvToUYoVK4avr2+uHic0NJTu3btjbW1Nr169sLCw4MGDBxw6dAgAGxsbvv32W7799ls+/PBDPvzwQwD1l6pbt27Rrl07HBwc6NmzJyYmJuzbt49+/foxf/58df0048ePx8bGhn79+hEbG5tpXFu2bGHSpEk0atSIzp078/z5c27evMnly5dfOz1v5syZLFu2jHr16lGrVi1u3LhB9+7def78eYb1J02ahIWFBf379+fhw4esXr2aCRMmMGfOHHWdHTt2YGJiwhdffIGJiQmnT59m3rx5REdHqxM8ffr0ISoqiidPnqgTjKampm/462csIiKCnj170qRJEz755BNsbW1JSUnB39+fCxcu0Lp1a1xdXfnrr79YvXo19+7dY9GiRUDq49G7d288PDwYOHAgRkZG/PPPP/zxxx/ZiuVtHTlyBEidGquNkydP0rNnT4oWLUr//v2Jj49n3bp1tGvXju3bt1O0aFGN+oMHD8bV1ZWhQ4dy7NgxFi9ejJWVFZs2baJatWoMGzaMXbt2MW3aNLy8vKhcubLG/osXL0alUtGzZ09CQ0NZvXo1Xbt2ZefOnRgbGwOpSc6RI0fi5eXFkCFDCA0NZc2aNfzxxx8EBARgYWGhbi8pKUn9JXz48OEYGxtjb2+f6XND28c1zfnz5zly5Ajt27cH4IcffqBPnz706NGDDRs20L59e549e8ayZcsYNWqUxhTPU6dO0bNnT8qVK0f//v1RqVRs376dLl26sGHDBry9vTWONWjQIIoWLcqQIUO4fv06W7duxcbGhq+++gqA77//ntGjR+Pt7U3r1q0BXpt4CgoK4tGjR5QpUybTOhcuXFA//qtWrcLGxiZdnaioKBITEzNtI02BAgXSvQZXr15NZGQkffv25eDBg29sQxunT59m3759dOjQAWtra5ycnLh16xYdOnTA1NSUHj16YGBgwObNm+nUqRPr1q2jfPnyGm1MmDBBfR66e/cuGzdu5NGjR6xdu/a1CcWMLFiwgKVLl9KqVSu8vb2Jjo7m6tWrXLt2jRo1atCmTRuePn3KiRMn+P7779/Y3ocffpjucb127RqrV6/O8PF5mTbn8aCgID7//HOioqJo3bo1Li4uBAUFceDAAeLj4zEyMiIkJIS2bdsSFxdHp06dsLa2ZseOHfj7+6sTzC9btGgRhoaGdO/enYSEBAwNDbP8/C9TpozOzptCCCHeIYoQQoi3FhUVpbi7uyv+/v5a71OvXj1l+PDh6vvz5s1T3N3d09Xbtm2b4u7urvz777+KoijKoUOHFHd3d+XPP//MtO3Q0FDF3d1dmTdvXrptXbp0UZo2bao8f/5cXZaSkqK0adNGadiwYbrjtmvXTklKSnpjf/z9/ZUmTZq8ts6rfQkODlbKlCmj9O3bV6Pe/PnzFXd3d42/T9q+Xbt2VVJSUtTlU6ZMUUqXLq1ERkaqy+Li4tIde8yYMUr58uU1+t2rVy+lXr16b4wzzenTpxV3d3fl9OnT6rKOHTsq7u7uysaNGzXqBgQEKJ6ensq5c+c0yjdu3Ki4u7srFy5cUBR
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1400x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"=== Médianes comportement ===\n",
|
|||
|
|
" gross_flow_to_aum flow_freq flow_direction_balance n_isin_total avg_holding_months_per_isin exit_rate_per_isin log_aum_qty_mean months_since_last_tx\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 7.884 0.071 0.000 1.0 11.333 1.000 5.280 69.0\n",
|
|||
|
|
"1 5.348 0.617 -0.006 12.0 28.924 0.667 8.768 3.0\n",
|
|||
|
|
"2 1.159 0.043 -1.000 3.0 60.000 0.400 5.167 27.0\n",
|
|||
|
|
"3 1.477 0.012 -1.000 3.0 12.000 0.714 3.407 127.0\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABB8AAAGGCAYAAAAzaSmEAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdUFFcbwOHf0sQCSFdRUEHAAoq9K2rs3dh7793YNXaNGiu22HuLPbGbGBNrNLYo9o6KNCkC0ub7g4/VFRZRgZXwPufMgZ25c/edvTuzu3duUSmKoiCEEEIIIYQQQgiRRvR0HYAQQgghhBBCCCH+26TyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQYivVI0aNRg9erSuw0hXu3fvxsXFhWfPnuk6FJFKRo8eTY0aNTTWubi4sHjxYh1F9HGLFy/GxcVF12HozJeUT3pdt+Li4mjYsCHLli1L8+fSlaTOna9JWpzH2j4DVq1aRc2aNSlcuDBNmjRJ1ef8WnXs2JGOHTvqOozPcv78eVxcXDh//nyaPs/QoUMZPHhwmj6HECJ1SeWDEOnsyZMnTJw4kZo1a+Lm5kbJkiVp06YN69evJzIyMl1iiIiIYPHixWn+xSCjO3DgAOvWrdN1GF+tkJAQ3NzccHFx4f79+7oO55PIOZA+7t27x+LFi1O9QvGXX37hxYsXdOjQQb0u4Yfr9evXNdKGhoby7bff4ubmxqlTp1I1jq5du+Li4sKUKVNSNd//uuXLl3P8+PEUpf3rr7+YM2cOJUuWZObMmQwbNiyNo/s0afUeT2ufUgZfq549e3L06FFu3bql61CEECkklQ9CpKOTJ0/SqFEjDh06hKenJxMmTGD48OHkyZOHOXPmMH369HSJIyIiAi8vLy5cuJAuz5dSTZo04dq1a9jZ2ek6FCD+B86GDRt0HcZX6/Dhw6hUKqytrdm/f7+uw/kkyZ0Dffv25dq1azqIKuM7fPgwU6dOVT++d+8eXl5e+Pj4pOrzrF69mgYNGmBiYpJsurCwMLp168bt27fx8vKiatWqqRbD0aNHuXLlSqrll9Fcu3aNvn37fta+K1asSPKHb1KfAefOnUNPT4/p06fTtGlTqlWr9tkxp4W0eo+nNW1lkBrKlCnDtWvXKFOmTJrkn6BIkSIUK1aMNWvWpOnzCCFSj4GuAxAis3j69ClDhw4lT548rF+/HhsbG/W29u3b8/jxY06ePKm7AFNBeHg42bJl++z99fX10dfXT8WIvk4RERFkzZpV12F8sf3791OtWjXy5MnDL7/8wtChQ3UdUqowMDDAwCDjfTx+6fmXGoyMjNL8OW7evMmtW7c+2r0jLCyM7t274+3tjZeXV6r+aH379i2zZs2iR48eLFq0KNXy/drFxcURHR1NlixZyJIlS6rnn9RnQEBAAMbGxuny3hJf5u3btxgaGqKnp5cm74+k1KtXj8WLF/PmzRuyZ8+eLs8phPh80vJBiHSyatUqwsPDmT59ukbFQwIHBwc6d+6sdX9t/dCT6iN7/fp1unfvTrly5XB3d6dGjRqMGTMGgGfPnlGhQgUAvLy8cHFxSdR39/79+wwaNIiyZcvi5uZG8+bNOXHiRJLPe+HCBSZNmkSFChU++uV+48aNNGjQgOLFi1OmTBmaN2/OgQMHkj2WuLg4Fi9eTOXKlSlevDgdO3bk3r17ifqWJ+x76dIlZs6cSfny5SlRogT9+/cnMDBQI47jx4/Tq1cvKleuTLFixahVqxZLliwhNjZWnaZjx46cPHkSHx8f9WuU0P9aW7/kpPq5duzYkYYNG/Lvv//Svn17ihcvzrx58wCIiopi0aJFfPPNNxQrVoxq1aoxe/ZsoqKiNPI9ffo0bdu2pXTp0nh4eFCnTh11Hrry/PlzLl68SP369WnQoAHPnj3jn3/++ez8bt68SY8ePShZsiQeHh507tw5ybvKISEhzJgxgxo1alCsWDGqVq3KyJEj1WUcFRXFwoULad68OaVKlaJEiRK0a9eOc+fOqfP42DmQ1LkWExPDkiVLqFWrFsWKFaNGjRrMmzcvUVnVqFGD3r17c/HiRXVT/5o1a7J3796PvgbPnj3DxcWF1atXs27dOjw9PXF3d6dDhw7cuXNHI+3o0aPx8PDgyZMn9OzZEw8PD0aMGAHEV0LMmjWLatWqUaxYMerUqcPq1atRFEUjj6ioKGbMmEH58uXx8PCgT58+vHz5MlFc2sYeSOp1ev+83L17t7o/dqdOndSvc8L5kdx1KjnHjx/H0NCQ0qVLa03z5s0bevTowY0bN1i8eDHVq1f/aL6fYuXKlSiKQvfu3VMlv+PHj9OwYUPc3Nxo2LAhx44dSzJdXFwc69ato0GDBri5uVGxYkUmTpxIcHCwRrqUvLZxcXGsX7+eRo0a4ebmRvny5enevbtGt5WELiX79+9XP+eff/6p3vb+50bC++H+/fsMHjyYkiVLUq5cOaZNm8bbt2818gwPD2fPnj3q98T775n3r60uLi7s3r2b8PBwddrdu3drfR3fv962adNGfexbt25NlDYgIICxY8dSsWJF3NzcaNy4MXv27EmU7tdff6V58+Z4eHhQsmRJGjVqxPr169XxJvceT86+ffv49ttv1Z+H7du356+//tKa/lM+dx49esTAgQOpVKkSbm5uVK1alaFDhxIaGgokXwYAvr6+jBkzhooVK1KsWDEaNGjAzz//nOTz/vrrr8yfP58qVapQvHhxwsLCkv0svHfvHh07dqR48eJUqVKFlStXJjpWHx8f+vTpQ4kSJahQoQIzZszgzz//TPK1rVixIuHh4Zw5c+ajr7kQQvcy3q0dITKo33//nXz58lGyZMk0fZ6AgAC6d++Oubk5vXr1wtTUlGfPnqm/zFpYWDBp0iQmTZrEN998wzfffAOg/hFx9+5d2rZti62tLT179iRbtmwcOnSI/v37s3jxYnX6BJMnT8bCwoL+/fsTHh6uNa4dO3Ywbdo06tSpQ6dOnXj79i23b9/m6tWrNGrUSOt+P/74I6tWrcLT05MqVapw69YtunfvrvFl9n3Tpk3D1NSUAQMG4OPjw/r165kyZQoLFixQp9mzZw/ZsmWja9euZMuWjXPnzrFo0SLCwsIYNWoUAH369CE0NJSXL1+qv7R/7l2V169f07NnTxo0aEDjxo2xtLQkLi6Ovn37cunSJVq1aoWjoyN37txh/fr1PHr0iKVLlwLx5dG7d29cXFwYNGgQRkZGPH78+It+6KeGX375haxZs+Lp6YmxsTH29vYcOHDgs97fd+/epX379mTPnp0ePXpgYGDA9u3b6dixI5s2baJ48eJA/A/K9u3bc//+fVq0aEGRIkUICgrit99+w9fXFwsLC8LCwti5cycNGzakZcuWvHnzhp9//pkePXqwc+dOChcu/NFzICnjx49nz5491KlTh65du3Lt2jVWrFjB/fv3WbJkiUbax48fM3jwYL799luaNWvGrl27GD16NEWLFqVQoUIffT327t3LmzdvaNeuHW/fvmXjxo107tyZAwcOYGVlpU4XExND9+7dKVWqFKNGjcLY2BhFUejbty/nz5/n22+/pXDhwvz555/Mnj0bX19fxo4dq95/3Lhx7N+/n4YNG1KyZEnOnTtHr169PqnsklOmTBk6duzIxo0b6dOnDwULFgTA0dHxo9ep5Fy+fBlnZ2cMDQ2T3B4REUHPnj35999/WbhwIZ6enonSREVFERYWlqLjsLCw0Hj8/PlzVq5cyYwZMzA2Nk5RHsn566+/GDhwIE5OTgwfPpygoCDGjBlDrly5EqWdOHEie/bsoXnz5nTs2JFnz56xefNmbt68ydatWzE0NEzxaztu3Dh2795N1apV+fbbb4mNjeXixYtcvXoVNzc3dbpz585x6NAh2rdvj7m5+Ue7xQ0ZMgQ7OzuGDx/OlStX2LhxIyEhIcyePRuA2bNnM378eNz
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1200x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"=== Médianes allocation ===\n",
|
|||
|
|
" share_asset_fixed_income share_asset_diversified share_asset_equity share_fund_carmignac_patrimoine share_fund_carmignac_investissement share_fund_carmignac_sécurité share_fund_carmignac_emergents\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 0.767 0.000 0.000 0.000 0.000 0.000 0.000\n",
|
|||
|
|
"1 0.284 0.207 0.155 0.152 0.011 0.018 0.002\n",
|
|||
|
|
"2 0.000 0.372 0.227 0.255 0.000 0.000 0.000\n",
|
|||
|
|
"3 0.000 0.326 0.099 0.169 0.000 0.000 0.000\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# Feature selection & preprocessing \n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"base_features = [\n",
|
|||
|
|
" \"flow_freq\",\n",
|
|||
|
|
" \"gross_flow_to_aum\",\n",
|
|||
|
|
" \"n_isin_total\",\n",
|
|||
|
|
" \"avg_holding_months_per_isin\",\n",
|
|||
|
|
" \"exit_rate_per_isin\",\n",
|
|||
|
|
" \"flow_direction_balance\",\n",
|
|||
|
|
" \"log_aum_qty_mean\",\n",
|
|||
|
|
" \"months_since_last_tx\",\n",
|
|||
|
|
"]\n",
|
|||
|
|
"all_features = [c for c in base_features if c in dfc.columns]\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc_clean = dfc.copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Diagnostic préalable ---\n",
|
|||
|
|
"vals_ff = dfc_clean[\"flow_freq\"].to_numpy(dtype=float)\n",
|
|||
|
|
"print(f\"flow_freq — min={vals_ff.min():.4f}, max={vals_ff.max():.4f}, \"\n",
|
|||
|
|
" f\"valeurs > 1 : {(vals_ff > 1).sum()}\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Imputation des NaN par 0 ---\n",
|
|||
|
|
"for col in [\"flow_direction_balance\", \"months_since_last_tx\"]:\n",
|
|||
|
|
" if col in dfc_clean.columns:\n",
|
|||
|
|
" dfc_clean[col] = dfc_clean[col].fillna(0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Groupe 1 : clip MAD 3 sigma ---\n",
|
|||
|
|
"for col in [\n",
|
|||
|
|
" \"n_isin_total\",\n",
|
|||
|
|
" \"exit_rate_per_isin\",\n",
|
|||
|
|
" \"avg_holding_months_per_isin\",\n",
|
|||
|
|
" \"months_since_last_tx\",\n",
|
|||
|
|
"]:\n",
|
|||
|
|
" if col not in dfc_clean.columns:\n",
|
|||
|
|
" continue\n",
|
|||
|
|
" vals = dfc_clean[col].to_numpy(dtype=float)\n",
|
|||
|
|
" med = np.nanmedian(vals)\n",
|
|||
|
|
" mad = np.nanmedian(np.abs(vals - med)) * 1.4826\n",
|
|||
|
|
" if mad > 0:\n",
|
|||
|
|
" dfc_clean[col] = np.clip(vals, med - 3*mad, med + 3*mad)\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" dfc_clean[col] = np.clip(vals, 0, np.nanpercentile(vals, 95))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Groupe 2 : clip p90 puis log-transform ---\n",
|
|||
|
|
"col = \"gross_flow_to_aum\"\n",
|
|||
|
|
"if col in dfc_clean.columns:\n",
|
|||
|
|
" vals = dfc_clean[col].to_numpy(dtype=float)\n",
|
|||
|
|
" vals = np.clip(vals, 0, np.nanpercentile(vals, 90))\n",
|
|||
|
|
" dfc_clean[col] = np.log1p(vals)\n",
|
|||
|
|
"\n",
|
|||
|
|
"col = \"flow_freq\"\n",
|
|||
|
|
"if col in dfc_clean.columns:\n",
|
|||
|
|
" vals = dfc_clean[col].to_numpy(dtype=float)\n",
|
|||
|
|
" dfc_clean[col] = np.log1p(np.clip(vals, 0, None))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Groupe 3 : log_aum_qty_mean — clip MAD 3 sigma ---\n",
|
|||
|
|
"col = \"log_aum_qty_mean\"\n",
|
|||
|
|
"if col in dfc_clean.columns:\n",
|
|||
|
|
" vals = dfc_clean[col].to_numpy(dtype=float)\n",
|
|||
|
|
" med = np.nanmedian(vals)\n",
|
|||
|
|
" mad = np.nanmedian(np.abs(vals - med)) * 1.4826\n",
|
|||
|
|
" dfc_clean[col] = np.clip(vals, med - 3*mad, med + 3*mad)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Groupe 4 : hhi_isin — clip p90 ---\n",
|
|||
|
|
"col = \"hhi_isin\"\n",
|
|||
|
|
"if col in dfc_clean.columns:\n",
|
|||
|
|
" vals = dfc_clean[col].to_numpy(dtype=float)\n",
|
|||
|
|
" dfc_clean[col] = np.clip(vals, 0, np.nanpercentile(vals, 90))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Construction de X ---\n",
|
|||
|
|
"X_num = dfc_clean[all_features].copy()\n",
|
|||
|
|
"X_num = X_num.loc[:, ~X_num.columns.duplicated()]\n",
|
|||
|
|
"X_num = X_num.fillna(X_num.median())\n",
|
|||
|
|
"\n",
|
|||
|
|
"X_cat = pd.get_dummies(\n",
|
|||
|
|
" dfc_clean[[\"country_grp\", \"region_grp\"]].fillna(\"Unknown\"), drop_first=True\n",
|
|||
|
|
")\n",
|
|||
|
|
"X = X_num.reset_index(drop=True)\n",
|
|||
|
|
"\n",
|
|||
|
|
"scaler = RobustScaler()\n",
|
|||
|
|
"X_scaled = scaler.fit_transform(X)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Diagnostic ---\n",
|
|||
|
|
"X_df = pd.DataFrame(X_scaled, columns=X.columns)\n",
|
|||
|
|
"extreme = (X_df.abs() > 5).any(axis=1).sum()\n",
|
|||
|
|
"print(f\"Accounts: {X.shape[0]} | Features: {X.shape[1]}\")\n",
|
|||
|
|
"print(f\"Points > 5 std after scaling: {extreme} ({extreme/len(X_df):.1%})\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"extreme_by_feat = (X_df.abs() > 5).sum().sort_values(ascending=False)\n",
|
|||
|
|
"print(\"\\nFeatures with most extreme values (>5 std):\")\n",
|
|||
|
|
"print(extreme_by_feat[extreme_by_feat > 0].to_string())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Clustering K=4 ---\n",
|
|||
|
|
"RESULTS = {}\n",
|
|||
|
|
"for k in [4]:\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=50, random_state=RANDOM_STATE)\n",
|
|||
|
|
" dfc[f\"cluster_k{k}\"] = km.fit_predict(X_scaled)\n",
|
|||
|
|
" RESULTS[k] = {\n",
|
|||
|
|
" \"model\": km,\n",
|
|||
|
|
" \"silhouette\": silhouette_score(X_scaled, dfc[f\"cluster_k{k}\"]),\n",
|
|||
|
|
" \"davies_bouldin\": davies_bouldin_score(X_scaled, dfc[f\"cluster_k{k}\"]),\n",
|
|||
|
|
" }\n",
|
|||
|
|
" print(f\"\\nK={k} | sil={RESULTS[k]['silhouette']:.4f} | db={RESULTS[k]['davies_bouldin']:.4f}\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"\\n=== Tailles des clusters ===\")\n",
|
|||
|
|
"counts = dfc[\"cluster_k4\"].value_counts().sort_index()\n",
|
|||
|
|
"props = counts / counts.sum() * 100\n",
|
|||
|
|
"print(pd.DataFrame({\"n_comptes\": counts, \"pct\": props.round(1)}))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Heatmap comportement ---\n",
|
|||
|
|
"profile_vars_behavior = [\n",
|
|||
|
|
" \"gross_flow_to_aum\",\n",
|
|||
|
|
" \"flow_freq\",\n",
|
|||
|
|
" \"flow_direction_balance\",\n",
|
|||
|
|
" \"n_isin_total\",\n",
|
|||
|
|
" \"avg_holding_months_per_isin\",\n",
|
|||
|
|
" \"exit_rate_per_isin\",\n",
|
|||
|
|
" \"log_aum_qty_mean\",\n",
|
|||
|
|
" \"months_since_last_tx\",\n",
|
|||
|
|
"]\n",
|
|||
|
|
"profile_vars_behavior = [c for c in profile_vars_behavior if c in dfc.columns]\n",
|
|||
|
|
"\n",
|
|||
|
|
"prof_behavior = plot_heatmap(\n",
|
|||
|
|
" dfc, profile_vars_behavior, \"cluster_k4\",\n",
|
|||
|
|
" title=\"Cluster signatures — Comportement (K=4, robust z-score)\",\n",
|
|||
|
|
" figsize=(14, 4)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"\\n=== Médianes comportement ===\")\n",
|
|||
|
|
"print(prof_behavior.round(3).to_string())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# --- Heatmap allocation (post-clustering, descriptive uniquement) ---\n",
|
|||
|
|
"profile_vars_allocation = [\n",
|
|||
|
|
" c for c in [\n",
|
|||
|
|
" \"share_asset_fixed_income\",\n",
|
|||
|
|
" \"share_asset_diversified\",\n",
|
|||
|
|
" \"share_asset_equity\",\n",
|
|||
|
|
" \"share_fund_carmignac_patrimoine\",\n",
|
|||
|
|
" \"share_fund_carmignac_investissement\",\n",
|
|||
|
|
" \"share_fund_carmignac_sécurité\",\n",
|
|||
|
|
" \"share_fund_carmignac_emergents\",\n",
|
|||
|
|
" ]\n",
|
|||
|
|
" if c in dfc.columns\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"prof_allocation = plot_heatmap(\n",
|
|||
|
|
" dfc, profile_vars_allocation, \"cluster_k4\",\n",
|
|||
|
|
" title=\"Cluster signatures — Allocation produits (K=4, descriptif post-clustering)\",\n",
|
|||
|
|
" figsize=(12, 4)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"\\n=== Médianes allocation ===\")\n",
|
|||
|
|
"print(prof_allocation.round(3).to_string())"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 199,
|
|||
|
|
"id": "85747735-d0b4-4aa7-9fc2-adf030f92286",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
" n_comptes pct\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 2708 37.7\n",
|
|||
|
|
"1 1174 16.4\n",
|
|||
|
|
"2 1476 20.6\n",
|
|||
|
|
"3 1821 25.4\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"counts = dfc[\"cluster_k4\"].value_counts().sort_index()\n",
|
|||
|
|
"props = counts / counts.sum() * 100\n",
|
|||
|
|
"print(pd.DataFrame({\"n_comptes\": counts, \"pct\": props.round(1)}))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 204,
|
|||
|
|
"id": "dc171be2-e066-4352-a0ea-32d7b7b046b0",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
" k inertia silhouette davies_bouldin\n",
|
|||
|
|
" 2 20206.150896 0.422448 0.971224\n",
|
|||
|
|
" 3 16684.870723 0.241253 1.542920\n",
|
|||
|
|
" 4 14655.878131 0.231172 1.510868\n",
|
|||
|
|
" 5 13189.616061 0.228827 1.408857\n",
|
|||
|
|
" 6 11997.575028 0.223735 1.416454\n",
|
|||
|
|
" 7 11089.241350 0.229848 1.419999\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdEAAAGGCAYAAACUkchWAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA3K9JREFUeJzs3XdYU+fbB/BvEvaWrYADkA3iRBG1r4p1z7q1rbWt2jpq9WetHWq11dZRq7bOVuuoo1WpVutq6wYHgoLgABdLZMjeSd4/kGgEBGLgML6f6+KqOXlycucm5Zzcec79iORyuRxERERERERERERERFSKWOgAiIiIiIiIiIiIiIhqKxbRiYiIiIiIiIiIiIjKwSI6EREREREREREREVE5WEQnIiIiIiIiIiIiIioHi+hEREREREREREREROVgEZ2IiIiIiIiIiIiIqBwsohMRERERERERERERlYNFdCIiIiIiIiIiIiKicrCITkRERERERERERERUDhbRiYiIiEgQ3bt3x9y5cxW3L168CGdnZ1y8eFGxbfz48ejfv78Q4dV6MpkM/fv3x7p16xTb1qxZA2dnZ6Smplb4+BfzXxXdu3fHpEmTVHpsTdm/fz+cnZ0RFhYmdCivbNeuXXjttddQUFAgdChEREpKjjsNydy5c9G9e/cKx8XGxsLZ2Rn79+9XbKuufF2/fh0eHh6Ii4tTbBs/fjzGjx+v9ueqjapy/lPbLV++HMOHDxc6DCoDi+hEtZSzszPWrFmjuF2fDgq12Yt5JyKiqrt16xamT5+O//u//4Onpye6dOmCCRMmYPv27UKHVi2ioqKwZs0axMbGlrpv586dSh+e1emvv/5CQkICxo0bVy37p9JOnz5d5nlCbm4u1qxZo/QFkDoNHToUhYWF2L17d7Xsn4jqppIvC0t+PD094efnh4kTJ2Lbtm3IysoSOsRqMXfuXKXX7ebmhm7dumHmzJmIiooSOjxBfP/99+jXrx9sbGyEDqXeOHToELZu3Vpqe2JiItasWYPIyMhqed633noLN2/exD///FMt+yfVsYhOVINePMl58Sc0NFToEF/Z9u3b0bZtWxQWFpY7xtnZGV999VUNRqWsvA/ARET06q5evYphw4bh5s2bGD58OL788ksMHz4cYrEY27ZtUxp79OhRLFq0SKBI1ScqKgpr165Vmv1VYteuXThw4EC1PO/PP/+Mfv36wdDQUKXH15f816TTp09j7dq1pbbn5uZi7dq1uHTpUrU8r7a2NgYPHoytW7dCLpdXy3MQUd01ffp0fPfdd1iwYIFi5vE333yDgQMH4ubNm9X63FOmTMH169er9TnKoqWlhe+++w7fffcdFi9ejCFDhiAwMBCjRo1CYmJijcdTWdWRr8jISFy4cAGjRo1S634bur/++qvUuSsAPH78GGvXrq22IrqFhQV69OiBX375pVr2T6rTEDoAooZo+vTpsLW1LbW9adOmAkSjXqdOnULnzp2hqakpdCjlOn36NHbu3Ilp06aVuu/69euQSCQCREVEVD+sX78ehoaG+OOPP2BkZKR0X0pKitJtLS2tmgytXomIiMDNmzdVbscC1P78FxUVQSaT1fo4q1NOTg709PQAAH369MHmzZsRFBSETp06CRwZEdUmXbt2haenp+L2pEmTEBgYiMmTJ+ODDz7AkSNHoKOjUy3PraGhAQ2Nmi8taWhoYNCgQUrbvL29MWnSJJw+fRojRoyo8ZgqozrytW/fPjRp0gTe3t5q3W9Nys/Ph6amJsTihjvX98Vj/owZMxATEwM7OzuBI6MSDffdSSSgrl27YtCgQaV+TE1NhQ7tleTm5uLy5ct47bXXhA6lTDk5ORWO0dbWFuQkkIiovnj48CEcHR1LFdABwMzMTOl2VXpyR0VFYfz48WjVqhW6dOmCTZs2lRqTkpKCefPmwdfXF56enhg4cGCpWeBl9V0Hyu5bCgDR0dGYPn06OnToAE9PTwwdOlTp8tr9+/djxowZAIA333xTcXXZxYsX0b17d9y5cweXLl1SbH++N2lGRga+/vprdOvWDR4eHvD398fGjRshk8kqzMfJkyehqamJdu3alXl/ZmYm5s6di3bt2qFt27b49NNPkZubqzSmrPzfvHkT48aNg5eXF7p27YqffvoJ+/btg7Ozc5ntaq5cuYI33ngDnp6e6NGjBwICAkqNqczrLMn/zz//jK1bt6Jnz57w9PREdHR0hbmoSF5eHr788kv4+PigTZs2mDNnDtLT00uNO336NMaMGQNvb2+0bt0a77//Pu7cuaO4f+7cudi5cycAKF1JGBsbqyhqr127VrH9+aveKnofAc+uWLx06RIWLFiATp06oVu3bor7PTw8YGJiwsu7iahSOnXqhA8++ABxcXE4ePCgYnvJF7A9evSAp6cnOnfujE8//RRPnjxRjDl69Kji79GLdu/eDWdnZ9y+fRtA+T2+//zzTwwdOhReXl7o0KEDZs6ciYSEBKUx9+/fx7Rp09C5c2d4enqia9eumDlzJjIzM1V6zebm5gBQalJUTEyM4m9wq1atMGLECJw6dUppTMnf4BePdeWdN7woIyMDc+fORdu2bdGuXTt88sknZb6OsvJVcqX2yZMn0b9/f3h4eKBfv344c+ZMpV73P//8g44dO0IkElU4tjLnSkOGDMHUqVOVtg0YMADOzs5KVzYcOXIEzs7OSsfqxMREfPrpp/D19VW8jj/++ENpXyU5PXz4ML7//nt06dIFrVq1Ukv7oSdPnmDGjBlo06YNfHx8sHjxYuTn55caV9H7c/z48Th16hTi4uIUx/Xu3bvj4sWLeOONNwAAn376qeK+588fr127hokTJ6Jt27Zo1aoVxo0bh+DgYKXnL3kfREVFYdasWWjfvj3GjBmjuN/X1xcAeMyvZVgpIqpjnjx5goULF+Ls2bPQ1NTEgAED8L///Q/a2tqKMUVFRdiwYQMOHDiAR48ewdLSEv3798fUqVMVs7mWLFmCgIAABAUFKQ62ixYtwo4dO/DZZ5/hzTffBAAkJyejc+fOmD9/vtIf9bIEBgaioKAAXbt2rdJrunjxIt588018//33ePDgAXbt2oUnT56gTZs2+Oqrr9CsWTOl8deuXcPq1asRGhqKoqIieHp6YubMmWjbtq1izJo1a7B27VocPnwY69atw5kzZ2BjYwMXFxfFScLzJy+3bt1SbJs6dapilnpcXBw2bdqEwMBAJCQkQFdXFz4+PpgzZ06ZVxMQETV0NjY2CAkJwe3bt+Hk5KSWfaanp+Pdd9+Fv78/+vTpg2PHjmH58uVwcnJSFBnz8vIwfvx4PHz4EGPHjoWtrS2OHj2KuXPnIiMjA2+99VaVn/fOnTsYPXo0rKys8N5770FPTw9///03PvzwQ6xZswb+/v5o3749xo8fj+3bt2Py5Mmwt7cHADg4OGDevHlYtGgR9PT0MHnyZADPPuDn5uZi3LhxSExMxKhRo9C4cWOEhIRg5cqVSEpKwmefffbS2EJCQuDk5FTulV8fffQRbG1t8fHHHyMiIgK///47TE1N8b///a/cfSYmJiry9P7770NPTw+///57uTPBHzx4gBkzZuCNN97AkCFDsG/fPsydOxfu7u5o2bKlSq9z//79yM/Px4gRI6ClpQVjY2MAqPSaMAYGBqXi/eqrr2BkZISpU6fi3r172LVrF+Lj47F9+3bFOVBAQADmzp0LPz8/zJ49G7m5udi1axfGjBmDAwcOwNbWFiNHjsTjx49x/vx5fPfdd4r9m5qaYsGCBViwYAH8/f3h7+8P4Nl5RmXeR89buHAhTE1N8eGHH5aaAODm5oarV69WKhdERIMGDcLKlStx7tw5xczsCxcuICYmBkOHDoWFhQXu3LmDvXv3IioqCnv37oVIJMJrr72m+FvVoUMHpX0eOXIELVu2fOkxft26dfjhhx/Qp08fvPHGG0hNTcWOHTswduxYBAQEwMjICAUFBZg4cSIKCgowbtw4mJubIzExEadOnUJGRkalWpWVHBtkMhliYmKwfPlymJiY4P/+7/8UY5KTkzFq1Cjk5uZi/PjxaNSoEQ4
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1500x400 with 3 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"K=4 | sil=0.2312 | db=1.5109\n",
|
|||
|
|
" n_comptes pct\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 2708 37.7\n",
|
|||
|
|
"1 1174 16.4\n",
|
|||
|
|
"2 1476 20.6\n",
|
|||
|
|
"3 1821 25.4\n",
|
|||
|
|
"\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMkAAAGGCAYAAABhZtaKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdYU9cbwPFvWCJ7ioKiAgIOUHDi1lptHa3aumfduOpqHXXUXeve2rr3qIp1a9Vq69aqdaC1jloXMmXLur8/KNEIaEAg/PT9PE8ezbnnnvsektwkb845V6UoioIQQgghhBBCCCGEEO8xPV0HIIQQQgghhBBCCCGErkmSTAghhBBCCCGEEEK89yRJJoQQQgghhBBCCCHee5IkE0IIIYQQQgghhBDvPUmSCSGEEEIIIYQQQoj3niTJhBBCCCGEEEIIIcR7T5JkQgghhBBCCCGEEOK9J0kyIYQQQgghhBBCCPHekySZEEIIIYQQQgghhHjvSZJMCCF0pH79+owYMULXYeSp7du34+HhwYMHD3QdihBCS3/++SflypXj4cOHug5F59LOYVeuXNF1KOI/GzdupG7duiQkJOg6FCGEEO8ASZIJIUQOu3//PmPHjuWDDz7Ay8sLX19f2rZty+rVq4mPj8+TGOLi4pg/fz5nzpzJk+P9v9q1axerVq3SdRj50vPnz1m1ahWtWrWiYsWKeHl50ahRIyZMmMDdu3d1HV6ue1efG0uWLOGXX37J0j6zZ8+mSZMmODk5qcs6depE06ZN09U9deoU5cuXp0WLFkRERLxtuGr379/Hy8vrvU1QHTt2jPnz5+s6jHypZcuWJCYmsmnTJl2HIoQQ4h0gSTIhhMhBv/76K82aNWPfvn3Uq1ePMWPGMHToUBwdHZk+fTqTJ0/Okzji4uJYsGABZ8+ezZPjaevTTz/lzz//1PiyrUu7d+9mzZo1ug4j3wkLC6Ndu3ZMnToVW1tbBg4cqE78HjlyhGbNmuk6xFz3rj43li5dmqUkWWBgICdPnqRt27ZvrHvq1Cn69OlDyZIlWblyJVZWVm8RqaYpU6ZgYGCQY+39vzl27BgLFizQdRj5UoECBWjevDmrVq1CURRdhyOEEOL/3Pv7aUMIIXLYv//+y+DBg3F0dGT16tUUKlRIva1Dhw78888//Prrr7oLMAfExsZiYmKS7f319fXR19fPwYjyp7i4OAoWLKjrMLJt5MiRBAYGMm/ePBo1aqSxbdCgQcyePVtHkeW+t32Ov2u2bduGo6MjFSpUeG29s2fP4u/vT4kSJXI8Qfbbb7/x+++/06NHDxYvXpxj7QKkpKSQmJhIgQIFcrRdkX3Pnz/H0NAQPT3tf8v/+OOPWbZsGadPn8bPzy8XoxNCCPGuk5FkQgiRQ5YtW0ZsbCyTJ0/WSJClKV68OF26dMl0//nz5+Ph4ZGuPKN1vK5cuUL37t2pWrUq3t7e1K9fn5EjRwLw4MED9ZeEBQsW4OHhgYeHh8ZUndu3bzNw4ECqVKmCl5cXLVu25PDhwxke9+zZs3z77bf4+flRp06d1/4N1q5dS5MmTShfvjyVK1emZcuW7Nq167V9SUlJYf78+dSsWZPy5cvTqVMn/v7773RrtqXte+HCBaZOnUq1atWoUKEC/fr1IywsTCOOX375hV69elGzZk3KlStHgwYNWLhwIcnJyeo6nTp14tdff+Xhw4fqv1H9+vUzjRPgzJkzeHh4aExjTZt2dvXqVTp06ED58uWZNWsWAAkJCcybN48PP/yQcuXKUadOHb7//vt0a+ecOHGCdu3aUalSJXx8fGjUqJG6jbx2+fJlfv31Vz7//PN0CTIAIyMjhg8frlF26tQp2rdvT4UKFahUqRL+/v7cvn1bo07a8/vu3bsMGzaMihUrUq1aNebMmYOiKDx+/Bh/f398fX2pUaMGK1as0Ng/7W+/d+9eZs2aRY0aNahQoQJ9+vTh8ePH6eLct28fLVu2xNvbm6pVqzJs2DCCgoI06owYMQIfHx/u379Pz5498fHxYdiwYa99boD2j6uHhwcTJkxg3759NG7cGG9vb9q0acPNmzcB2LRpEx9++CFeXl506tQpw7X6Ll++TPfu3alYsSLly5enY8eOXLhwIcO/7T///MOIESOoVKkSFStWZOTIkcTFxWnEExsby44dO9T9etO6iIcPH6ZatWqoVKpM65w/f57evXvj7OzMypUrsba2fm2bWZGYmMjkyZPp3Lkzzs7Ob91e2mPy888/06RJE7y8vPjtt98AuH79Oj169MDX1xcfHx+6dOnCpUuXMmwnPj6esWPHUrVqVXx9ffn666959uxZumNlNEXy1XNbYmIiCxYsoGHDhnh5eVG1alXatWvHiRMngNTn6fr169Vtpt0yk/Z8yOimzTqYbzqPAwQFBTFq1Cj1ObZ+/fqMGzdO4zXw77//qt9nypcvT+vWrdP9UJT2ut6zZw+zZ8+mVq1alC9fnujoaEC75z9AuXLlsLKySvc+JoQQQmSVjCQTQogccvToUYoVK4avr2+uHic0NJTu3btjbW1Nr169sLCw4MGDBxw6dAgAGxsbvv32W7799ls+/PBDPvzwQwD1l6pbt27Rrl07HBwc6NmzJyYmJuzbt49+/foxf/58df0048ePx8bGhn79+hEbG5tpXFu2bGHSpEk0atSIzp078/z5c27evMnly5dfOz1v5syZLFu2jHr16lGrVi1u3LhB9+7def78eYb1J02ahIWFBf379+fhw4esXr2aCRMmMGfOHHWdHTt2YGJiwhdffIGJiQmnT59m3rx5REdHqxM8ffr0ISoqiidPnqgTjKampm/462csIiKCnj170qRJEz755BNsbW1JSUnB39+fCxcu0Lp1a1xdXfnrr79YvXo19+7dY9GiRUDq49G7d288PDwYOHAgRkZG/PPPP/zxxx/ZiuVtHTlyBEidGquNkydP0rNnT4oWLUr//v2Jj49n3bp1tGvXju3bt1O0aFGN+oMHD8bV1ZWhQ4dy7NgxFi9ejJWVFZs2baJatWoMGzaMXbt2MW3aNLy8vKhcubLG/osXL0alUtGzZ09CQ0NZvXo1Xbt2ZefOnRgbGwOpSc6RI0fi5eXFkCFDCA0NZc2aNfzxxx8EBARgYWGhbi8pKUn9JXz48OEYGxtjb2+f6XND28c1zfnz5zly5Ajt27cH4IcffqBPnz706NGDDRs20L59e549e8ayZcsYNWqUxhTPU6dO0bNnT8qVK0f//v1RqVRs376dLl26sGHDBry9vTWONWjQIIoWLcqQIUO4fv06W7duxcbGhq+++gqA77//ntGjR+Pt7U3r1q0BXpt4CgoK4tGjR5QpUybTOhcuXFA//qtWrcLGxiZdnaioKBITEzNtI02BAgXSvQZXr15NZGQkffv25eDBg29sQxunT59m3759dOjQAWtra5ycnLh16xYdOnTA1NSUHj16YGBgwObNm+nUqRPr1q2jfPnyGm1MmDBBfR66e/cuGzdu5NGjR6xdu/a1CcWMLFiwgKVLl9KqVSu8vb2Jjo7m6tWrXLt2jRo1atCmTRuePn3KiRMn+P7779/Y3ocffpjucb127RqrV6/O8PF5mTbn8aCgID7//HOioqJo3bo1Li4uBAUFceDAAeLj4zEyMiIkJIS2bdsSFxdHp06dsLa2ZseOHfj7+6sTzC9btGgRhoaGdO/enYSEBAwNDbP8/C9TpozOzptCCCHeIYoQQoi3FhUVpbi7uyv+/v5a71OvXj1l+PDh6vvz5s1T3N3d09Xbtm2b4u7urvz777+KoijKoUOHFHd3d+XPP//MtO3Q0FDF3d1dmTdvXrptXbp0UZo2bao8f/5cXZaSkqK0adNGadiwYbrjtmvXTklKSnpjf/z9/ZUmTZq8ts6rfQkODlbKlCmj9O3bV6Pe/PnzFXd3d42/T9q+Xbt2VVJSUtTlU6ZMUUqXLq1ERkaqy+Li4tIde8yYMUr58uU1+t2rVy+lXr16b4wzzenTpxV3d3fl9OnT6rKOHTsq7u7uysaNGzXqBgQEKJ6ensq5c+c0yjdu3Ki4u7srFy5cUBR
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1400x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"=== Médianes comportement K=4 ===\n",
|
|||
|
|
" gross_flow_to_aum flow_freq flow_direction_balance n_isin_total avg_holding_months_per_isin exit_rate_per_isin log_aum_qty_mean months_since_last_tx\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 7.884 0.071 0.000 1.0 11.333 1.000 5.280 69.0\n",
|
|||
|
|
"1 5.348 0.617 -0.006 12.0 28.924 0.667 8.768 3.0\n",
|
|||
|
|
"2 1.159 0.043 -1.000 3.0 60.000 0.400 5.167 27.0\n",
|
|||
|
|
"3 1.477 0.012 -1.000 3.0 12.000 0.714 3.407 127.0\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABB8AAAGGCAYAAAAzaSmEAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdUFFcbwOHf0sQCSFdRUEHAAoq9K2rs3dh7793YNXaNGiu22HuLPbGbGBNrNLYo9o6KNCkC0ub7g4/VFRZRgZXwPufMgZ25c/edvTuzu3duUSmKoiCEEEIIIYQQQgiRRvR0HYAQQgghhBBCCCH+26TyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQQghhBBCCCGEEGlKKh+EEEIIIYQQQgiRpqTyQYivVI0aNRg9erSuw0hXu3fvxsXFhWfPnuk6FJFKRo8eTY0aNTTWubi4sHjxYh1F9HGLFy/GxcVF12HozJeUT3pdt+Li4mjYsCHLli1L8+fSlaTOna9JWpzH2j4DVq1aRc2aNSlcuDBNmjRJ1ef8WnXs2JGOHTvqOozPcv78eVxcXDh//nyaPs/QoUMZPHhwmj6HECJ1SeWDEOnsyZMnTJw4kZo1a+Lm5kbJkiVp06YN69evJzIyMl1iiIiIYPHixWn+xSCjO3DgAOvWrdN1GF+tkJAQ3NzccHFx4f79+7oO55PIOZA+7t27x+LFi1O9QvGXX37hxYsXdOjQQb0u4Yfr9evXNdKGhoby7bff4ubmxqlTp1I1jq5du+Li4sKUKVNSNd//uuXLl3P8+PEUpf3rr7+YM2cOJUuWZObMmQwbNiyNo/s0afUeT2ufUgZfq549e3L06FFu3bql61CEECkklQ9CpKOTJ0/SqFEjDh06hKenJxMmTGD48OHkyZOHOXPmMH369HSJIyIiAi8vLy5cuJAuz5dSTZo04dq1a9jZ2ek6FCD+B86GDRt0HcZX6/Dhw6hUKqytrdm/f7+uw/kkyZ0Dffv25dq1azqIKuM7fPgwU6dOVT++d+8eXl5e+Pj4pOrzrF69mgYNGmBiYpJsurCwMLp168bt27fx8vKiatWqqRbD0aNHuXLlSqrll9Fcu3aNvn37fta+K1asSPKHb1KfAefOnUNPT4/p06fTtGlTqlWr9tkxp4W0eo+nNW1lkBrKlCnDtWvXKFOmTJrkn6BIkSIUK1aMNWvWpOnzCCFSj4GuAxAis3j69ClDhw4lT548rF+/HhsbG/W29u3b8/jxY06ePKm7AFNBeHg42bJl++z99fX10dfXT8WIvk4RERFkzZpV12F8sf3791OtWjXy5MnDL7/8wtChQ3UdUqowMDDAwCDjfTx+6fmXGoyMjNL8OW7evMmtW7c+2r0jLCyM7t274+3tjZeXV6r+aH379i2zZs2iR48eLFq0KNXy/drFxcURHR1NlixZyJIlS6rnn9RnQEBAAMbGxuny3hJf5u3btxgaGqKnp5cm74+k1KtXj8WLF/PmzRuyZ8+eLs8phPh80vJBiHSyatUqwsPDmT59ukbFQwIHBwc6d+6sdX9t/dCT6iN7/fp1unfvTrly5XB3d6dGjRqMGTMGgGfPnlGhQgUAvLy8cHFxSdR39/79+wwaNIiyZcvi5uZG8+bNOXHiRJLPe+HCBSZNmkSFChU++uV+48aNNGjQgOLFi1OmTBmaN2/OgQMHkj2WuLg4Fi9eTOXKlSlevDgdO3bk3r17ifqWJ+x76dIlZs6cSfny5SlRogT9+/cnMDBQI47jx4/Tq1cvKleuTLFixahVqxZLliwhNjZWnaZjx46cPHkSHx8f9WuU0P9aW7/kpPq5duzYkYYNG/Lvv//Svn17ihcvzrx58wCIiopi0aJFfPPNNxQrVoxq1aoxe/ZsoqKiNPI9ffo0bdu2pXTp0nh4eFCnTh11Hrry/PlzLl68SP369WnQoAHPnj3jn3/++ez8bt68SY8ePShZsiQeHh507tw5ybvKISEhzJgxgxo1alCsWDGqVq3KyJEj1WUcFRXFwoULad68OaVKlaJEiRK0a9eOc+fOqfP42DmQ1LkWExPDkiVLqFWrFsWKFaNGjRrMmzcvUVnVqFGD3r17c/HiRXVT/5o1a7J3796PvgbPnj3DxcWF1atXs27dOjw9PXF3d6dDhw7cuXNHI+3o0aPx8PDgyZMn9OzZEw8PD0aMGAHEV0LMmjWLatWqUaxYMerUqcPq1atRFEUjj6ioKGbMmEH58uXx8PCgT58+vHz5MlFc2sYeSOp1ev+83L17t7o/dqdOndSvc8L5kdx1KjnHjx/H0NCQ0qVLa03z5s0bevTowY0bN1i8eDHVq1f/aL6fYuXKlSiKQvfu3VMlv+PHj9OwYUPc3Nxo2LAhx44dSzJdXFwc69ato0GDBri5uVGxYkUmTpxIcHCwRrqUvLZxcXGsX7+eRo0a4ebmRvny5enevbtGt5WELiX79+9XP+eff/6p3vb+50bC++H+/fsMHjyYkiVLUq5cOaZNm8bbt2818gwPD2fPnj3q98T775n3r60uLi7s3r2b8PBwddrdu3drfR3fv962adNGfexbt25NlDYgIICxY8dSsWJF3NzcaNy4MXv27EmU7tdff6V58+Z4eHhQsmRJGjVqxPr169XxJvceT86+ffv49ttv1Z+H7du356+//tKa/lM+dx49esTAgQOpVKkSbm5uVK1alaFDhxIaGgokXwYAvr6+jBkzhooVK1KsWDEaNGjAzz//nOTz/vrrr8yfP58qVapQvHhxwsLCkv0svHfvHh07dqR48eJUqVKFlStXJjpWHx8f+vTpQ4kSJahQoQIzZszgzz//TPK1rVixIuHh4Zw5c+ajr7kQQvcy3q0dITKo33//nXz58lGyZMk0fZ6AgAC6d++Oubk5vXr1wtTUlGfPnqm/zFpYWDBp0iQmTZrEN998wzfffAOg/hFx9+5d2rZti62tLT179iRbtmwcOnSI/v37s3jxYnX6BJMnT8bCwoL+/fsTHh6uNa4dO3Ywbdo06tSpQ6dOnXj79i23b9/m6tWrNGrUSOt+P/74I6tWrcLT05MqVapw69YtunfvrvFl9n3Tpk3D1NSUAQMG4OPjw/r165kyZQoLFixQp9mzZw/ZsmWja9euZMuWjXPnzrFo0SLCwsIYNWoUAH369CE0NJSXL1+qv7R/7l2V169f07NnTxo0aEDjxo2xtLQkLi6Ovn37cunSJVq1aoWjoyN37txh/fr1PHr0iKVLlwLx5dG7d29cXFwYNGgQRkZGPH78+It+6KeGX375haxZs+Lp6YmxsTH29vYcOHDgs97fd+/epX379mTPnp0ePXpgYGDA9u3b6dixI5s2baJ48eJA/A/K9u3bc//+fVq0aEGRIkUICgrit99+w9fXFwsLC8LCwti5cycNGzakZcuWvHnzhp9//pkePXqwc+dOChcu/NFzICnjx49nz5491KlTh65du3Lt2jVWrFjB/fv3WbJkiUbax48fM3jwYL799luaNWvGrl27GD16NEWLFqVQoUIffT327t3LmzdvaNeuHW/fvmXjxo107tyZAwcOYGVlpU4XExND9+7dKVWqFKNGjcLY2BhFUejbty/nz5/n22+/pXDhwvz555/Mnj0bX19fxo4dq95/3Lhx7N+/n4YNG1KyZEnOnTtHr169PqnsklOmTBk6duzIxo0b6dOnDwULFgTA0dHxo9ep5Fy+fBlnZ2cMDQ2T3B4REUHPnj35999/WbhwIZ6enonSREVFERYWlqLjsLCw0Hj8/PlzVq5cyYwZMzA2Nk5RHsn566+/GDhwIE5OTgwfPpygoCDGjBlDrly5EqWdOHEie/bsoXnz5nTs2JFnz56xefNmbt68ydatWzE0NEzxaztu3Dh2795N1apV+fbbb4mNjeXixYtcvXoVNzc3dbpz585x6NAh2rdvj7m5+Ue7xQ0ZMgQ7OzuGDx/OlStX2LhxIyEhIcyePRuA2bNnM378eNz
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1200x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"=== Médianes allocation K=4 ===\n",
|
|||
|
|
" share_asset_fixed_income share_asset_diversified share_asset_equity share_fund_carmignac_patrimoine share_fund_carmignac_investissement share_fund_carmignac_sécurité share_fund_carmignac_emergents\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 0.767 0.000 0.000 0.000 0.000 0.000 0.000\n",
|
|||
|
|
"1 0.284 0.207 0.155 0.152 0.011 0.018 0.002\n",
|
|||
|
|
"2 0.000 0.372 0.227 0.255 0.000 0.000 0.000\n",
|
|||
|
|
"3 0.000 0.326 0.099 0.169 0.000 0.000 0.000\n",
|
|||
|
|
"\n",
|
|||
|
|
"=== Distribution par pays (top 10) ===\n",
|
|||
|
|
"country_grp Belgium FRANCE France Italy Latam Luxembourg Other Spain Switzerland United Kingdom Us Offshore\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 1.5 0.1 17.1 5.0 12.5 6.1 3.0 5.0 3.6 6.6 39.4\n",
|
|||
|
|
"1 3.9 8.3 32.1 12.7 0.8 4.6 17.7 6.7 4.5 8.3 0.3\n",
|
|||
|
|
"2 0.9 0.5 69.6 2.8 10.0 1.4 3.9 1.6 2.2 6.7 0.5\n",
|
|||
|
|
"3 2.0 0.0 53.0 4.2 2.9 5.1 7.2 2.1 7.3 10.8 5.3\n",
|
|||
|
|
"\n",
|
|||
|
|
"=== Distribution par région ===\n",
|
|||
|
|
"region_grp Belgium FRANCE France Germany Italy LATAM Luxembourg Other Spain Switzerland United Kingdom\n",
|
|||
|
|
"cluster_k4 \n",
|
|||
|
|
"0 1.5 0.1 17.2 0.6 5.0 52.0 6.1 1.9 5.2 3.6 6.7\n",
|
|||
|
|
"1 3.9 8.4 32.5 1.2 12.7 1.2 4.6 15.5 7.2 4.5 8.3\n",
|
|||
|
|
"2 0.9 0.5 69.8 0.9 2.8 10.6 1.4 2.5 1.7 2.2 6.8\n",
|
|||
|
|
"3 2.0 0.0 53.0 4.3 4.2 8.2 5.1 2.6 2.3 7.3 10.9\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABrcAAAGGCAYAAADRitpgAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XVUFOsfBvCHNghJAxQsQEHEFuy49lWueW2v3d36EwMVW7EDUezu7rp2B7ZSIi0NC+z+/uC6uBIuyLK77PM5h3Pc2ZnZ7zjBPLzvvqMmEolEICIiIiIiIiIiIiIiIlIC6vIugIiIiIiIiIiIiIiIiEhabNwiIiIiIiIiIiIiIiIipcHGLSIiIiIiIiIiIiIiIlIabNwiIiIiIiIiIiIiIiIipcHGLSIiIiIiIiIiIiIiIlIabNwiIiIiIiIiIiIiIiIipcHGLSIiIiIiIiIiIiIiIlIabNwiIiIiIiIiIiIiIiIipcHGLSIiIiIiIiIiIiIiIlIabNwiIiIiIiIiIiJSEK9fv8bq1asRFBQk71KIiIgUFhu3iFTY6tWrYWNjky+f1bt3b/Tu3Vv8+u7du7CxscHZs2fz5fOnTp2Kpk2b5stnUd6Ii4uDk5MTjh8/Lu9SFMLP55AyuX79OqpVq4aIiAh5l0JERESkcJjL6EcxMTEYOXIkoqKiULJkyVytIyAgADY2Njh8+HAeVye9QYMGYebMmXL7fEWSn+d4XouMjISjoyOuXbsm71KIiDJg4xZRAXH48GHY2NiIf6pUqYL69etjwIAB8Pb2RmxsbJ58TnBwMFavXg0fH588WV9eUuTaKOe8vb1RtGhRtG3bVjzt/fv36NGjB6pVq4aOHTvi8ePHGZbz8vJC27ZtkZKSkp/lKr0NGzbg4sWLMll3w4YNUaZMGWzcuFEm6yciIiJSFMxlil2bMpg2bRoqV66M6dOny7uUXHv48CFu3bqFQYMGiadFR0djwoQJqFWrFpo1a4YDBw5kWO758+eoWrUq/P3987NcpXfixAls27ZNJus2NDRE586dsWrVKpmsn4jod7Bxi6iAGT16NBYvXozZs2eLe+QtWLAA7du3x+vXryXmHTZsGJ49e5aj9YeEhGDNmjU5Diqenp7w9PTM0TI5lV1t8+bNy7feiPT7kpOT4e3tjS5dukBDQwMAkJqaipEjR0IoFGLy5MkwNjbG8OHDJf5AEB4ejrVr12LatGnQ1NSUV/lKaePGjTJr3AKAbt26Yd++fXn2Bx0iIiIiRcZcxlyWGwEBAbC3t8eSJUugrp77P9mZm5vj2bNn6NChQx5WJz1PT084OTnB0tJSPG3RokW4d+8eRo0ahSZNmuB///sfHj16JH5fJBJh/vz56Nu3L0qXLi2PspXWyZMn4e3tLbP1d+/eHS9fvsTt27dl9hlERLnBxi2iAqZhw4bo0KEDOnXqhCFDhsDT0xNeXl4IDw/H8OHDkZiYKJ5XU1MTOjo6Mq0nISEBAKCtrQ1tbW2ZflZ2tLS05Pr5shYfHy/vEvLU1atXERERgdatW4unff78GZ8+fcLy5cvRvXt3eHh4ID4+XuLbW8uXL0fNmjVRv359eZSdpZSUFAgEAnmXke+SkpIgFAoBAC1btoRAIOAfM4iIiEglMJdljrks+/ktLCwwdOjQ3z4e1NTUoKOjI+4omJ/Cw8Nx7do1tGrVSmL61atXMWHCBPTp0wczZ85ErVq1cOXKFfH7x48fR2BgIIYMGZLfJWdLJBJJnK+qQigUIikpCQBQvnx5WFtb48iRI3KuiohIEhu3iFSAk5MThg8fjsDAQInnF2U27vOtW7fQvXt31KxZE9WqVUPLli2xfPlyAGnjsXfu3BlA2lAJ34fa+D6Od+/evdGuXTu8ePECPXv2RNWqVcXLZvW8IKFQiOXLl6NevXpwdHTE0KFDMzw0t2nTppg6dWqGZX9c569qy2xs9/j4eLi7u6NRo0awt7dHy5Yt4enpCZFIJDGfjY0N5s6di4sXL6Jdu3awt7dH27Ztcf369ez+28V12djY4PTp07/czgcPHmD06NFo3Lgx7O3t0ahRIyxYsCDDjfTUqVNRrVo1+Pn5YdCgQahWrRomTpyYZQ3f9/OHDx8wZswYVK9eHXXq1IGbm5v4ZvW7Q4cOoU+fPnBycoK9vT3atGmD3bt3S8wzZcoU1KlTB8nJyRk+q3///mjZsqX4dXbHU3YuXrwIc3NzlClTRjzte636+voAgMKFC6NQoULi/5+XL1/ixIkTmDZt2i/X/6Mfj9u///4bDg4OaNq0Kfbs2SMxn0AgwKpVq9CxY0fUqFEDjo6O6NGjB+7cuSMx3/fx7T09PbFt2zY0b94cVapUwYcPH7Kt49ixY+jcuTOqVq2KWrVqoWfPnrh582aW838f8iYgIEBi+vdj7u7du+Jpnz9/xqhRo1CvXj1UqVIFDRs2xLhx4xATEwMg7RiPj4/HkSNHxOfOj+dccHAwpk2bBmdnZ/Hxf/DgwUw/99SpU1ixYgUaNGiAqlWrir+pZWxsDBsbG1y6dCnb/wciIiKigoq5jLnMxsYG79+/Fw/P16NHD/H7x44dQ8eOHeHg4IDatWtj3LhxGWoDgF27dqFZs2ZwcHBA586d8eDBgwz7Natnbt2+fRs9evSAo6MjatasiWHDhmXIKd/r9PX1xdSpU1GzZk3UqFED06ZNEzeSZufq1atISUmBs7OzxPTExERxlgMAAwMD8fri4+OxbNkyTJgwAUWLFv3lZ/xca15lXSDtOB8yZAhu3Lgh3h979+7Nto6nT59i0KBBqFWrFhwdHfHnn39i+/btWc6f3TPRbGxssHr1avHr2NhYzJ8/H02bNoW9vT2cnJzwzz//4OXLlwDSzr+rV68iMDBQfL79eI4JBAJ4eHjgjz/+EB/PixcvztD58vv5dfz4cbRt2xZVqlTBjRs3xO87OzvjypUrGc5LIiJ54phNRCqiQ4cOWL58OW7evImuXbtmOs+7d+8wZMgQ2NjYYPTo0dDW1oavr694qIDy5ctj9OjR8PDwQLdu3VCjRg0AQPXq1cXr+PbtGwYNGoS2bduiffv2MDY2zrau9evXQ01NDYMGDUJ4eDi2b9+Ofv364dixYyhUqJDU2ydNbT8SiUQYNmyYOHxVqlQJN27cwOLFixEcHJxhfPOHDx/i/Pnz6NGjB4oWLYodO3Zg9OjRuHLlCgwNDX9ZnzTbefbsWSQmJqJ79+4oVqwYnj17hp07d+Lr16/w8PCQWF9KSgoGDBiAGjVqYMqUKVL9X40dOxbm5uaYMGECnjx5gh07diA6OhqLFy8Wz7Nnzx5UrFgRTZs2haamJq5cuYI5c+ZAJBKhZ8+eANKOpaNHj+LmzZto0qSJeNnQ0FDcuXMHI0aMAPDr4yk7jx8/hp2dncQ0Kysr6OnpYc2aNejduzfOnDmD2NhYVK5cGQDg5uaGnj17Sgx9Ia2oqCgMHjwYrVu3Rtu2bXHmzBnMnj0bWlpa4nAeGxuLAwcOoF27dujSpQvi4uJw8OBBDBw4EAcOHEClSpUk1nn48GEkJSWha9eu0NbWhoGBQZafv2bNGqxevRrVqlXD6NGjoaWlhadPn+LOnTu//S00gUCAAQMGQCAQoFevXjAxMUFwcDCuXr2K6Oho6OnpYfHixZg5cyYcHBzE14fvDYthYWHo2rUr1NTU0LNnTxgZGeH69euYMWMGYmNj0a9fP4nPW7duHbS0tMSfqaWlJX7Pzs5OpkMfEhERESk65jJJqpjLxowZA0tLS4wbN07cULB+/XqsWrUKrVu3RufOnREREYGdO3eiZ8+eOHr0qLhRaPfu3Zg7dy5q1qyJfv36ITAwECNGjIC+vj5KlCiR7ef++++/GDRoECwsLDBy5EgkJiZi586d6N69Ow4fPgwLCwuJ+ceOHQsLCwuMHz8er169woEDB2BkZIRJkyZl+zmPHz9GsWLFYG5uLjG9SpUq8PLyQrly5eDv748bN25g3rx5ANKGSC9evHiuh1HMq6z73adPnzBhwgR069YNXbt2RdmyZbP87Fu3bmHIkCEwMzNDnz59YGJigg8fPuDq1av
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1800x400 with 4 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# K-selection diagnostics\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"rows = []\n",
|
|||
|
|
"for k in range(2, 8):\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=50, random_state=RANDOM_STATE)\n",
|
|||
|
|
" labels = km.fit_predict(X_scaled)\n",
|
|||
|
|
" rows.append({\n",
|
|||
|
|
" \"k\": k,\n",
|
|||
|
|
" \"inertia\": km.inertia_,\n",
|
|||
|
|
" \"silhouette\": silhouette_score(X_scaled, labels),\n",
|
|||
|
|
" \"davies_bouldin\": davies_bouldin_score(X_scaled, labels),\n",
|
|||
|
|
" })\n",
|
|||
|
|
"df_kdiag = pd.DataFrame(rows)\n",
|
|||
|
|
"print(df_kdiag.to_string(index=False))\n",
|
|||
|
|
"\n",
|
|||
|
|
"fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n",
|
|||
|
|
"for ax, col, title in zip(axes,\n",
|
|||
|
|
" [\"inertia\", \"silhouette\", \"davies_bouldin\"],\n",
|
|||
|
|
" [\"Elbow / Inertia\", \"Silhouette (higher=better)\", \"Davies-Bouldin (lower=better)\"]):\n",
|
|||
|
|
" ax.plot(df_kdiag[\"k\"], df_kdiag[col], marker=\"o\")\n",
|
|||
|
|
" ax.set_title(title)\n",
|
|||
|
|
" ax.set_xlabel(\"K\")\n",
|
|||
|
|
"plt.tight_layout()\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# Clustering K=4\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"RESULTS = {}\n",
|
|||
|
|
"for k in [4]:\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=50, random_state=RANDOM_STATE)\n",
|
|||
|
|
" dfc[f\"cluster_k{k}\"] = km.fit_predict(X_scaled)\n",
|
|||
|
|
" RESULTS[k] = {\n",
|
|||
|
|
" \"model\": km,\n",
|
|||
|
|
" \"silhouette\": silhouette_score(X_scaled, dfc[f\"cluster_k{k}\"]),\n",
|
|||
|
|
" \"davies_bouldin\": davies_bouldin_score(X_scaled, dfc[f\"cluster_k{k}\"]),\n",
|
|||
|
|
" }\n",
|
|||
|
|
" print(f\"K={k} | sil={RESULTS[k]['silhouette']:.4f} | db={RESULTS[k]['davies_bouldin']:.4f}\")\n",
|
|||
|
|
" counts = dfc[f\"cluster_k{k}\"].value_counts().sort_index()\n",
|
|||
|
|
" props = counts / counts.sum() * 100\n",
|
|||
|
|
" print(pd.DataFrame({\"n_comptes\": counts, \"pct\": props.round(1)}))\n",
|
|||
|
|
" print()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# Heatmap comportement\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"prof_behavior = plot_heatmap(\n",
|
|||
|
|
" dfc, profile_vars_behavior, \"cluster_k4\",\n",
|
|||
|
|
" title=\"Cluster signatures — Comportement (K=4, robust z-score)\",\n",
|
|||
|
|
" figsize=(14, 4)\n",
|
|||
|
|
")\n",
|
|||
|
|
"print(\"\\n=== Médianes comportement K=4 ===\")\n",
|
|||
|
|
"print(prof_behavior.round(3).to_string())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# Heatmap allocation (descriptif post-clustering)\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"prof_allocation = plot_heatmap(\n",
|
|||
|
|
" dfc, profile_vars_allocation, \"cluster_k4\",\n",
|
|||
|
|
" title=\"Cluster signatures — Allocation produits (K=4, descriptif post-clustering)\",\n",
|
|||
|
|
" figsize=(12, 4)\n",
|
|||
|
|
")\n",
|
|||
|
|
"print(\"\\n=== Médianes allocation K=4 ===\")\n",
|
|||
|
|
"print(prof_allocation.round(3).to_string())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"# Description géographique post-clustering\n",
|
|||
|
|
"# ============================================================\n",
|
|||
|
|
"print(\"\\n=== Distribution par pays (top 10) ===\")\n",
|
|||
|
|
"geo_country = pd.crosstab(\n",
|
|||
|
|
" dfc[\"cluster_k4\"],\n",
|
|||
|
|
" dfc[\"country_grp\"].fillna(\"Unknown\"),\n",
|
|||
|
|
" normalize=\"index\"\n",
|
|||
|
|
").round(3) * 100\n",
|
|||
|
|
"print(geo_country.to_string())\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"\\n=== Distribution par région ===\")\n",
|
|||
|
|
"geo_region = pd.crosstab(\n",
|
|||
|
|
" dfc[\"cluster_k4\"],\n",
|
|||
|
|
" dfc[\"region_grp\"].fillna(\"Unknown\"),\n",
|
|||
|
|
" normalize=\"index\"\n",
|
|||
|
|
").round(3) * 100\n",
|
|||
|
|
"print(geo_region.to_string())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Visualisation heatmap géographique\n",
|
|||
|
|
"fig, axes = plt.subplots(1, 2, figsize=(18, 4))\n",
|
|||
|
|
"\n",
|
|||
|
|
"sns.heatmap(\n",
|
|||
|
|
" geo_country,\n",
|
|||
|
|
" cmap=\"Blues\", annot=True, fmt=\".1f\",\n",
|
|||
|
|
" ax=axes[0],\n",
|
|||
|
|
" cbar_kws={\"label\": \"%\"}\n",
|
|||
|
|
")\n",
|
|||
|
|
"axes[0].set_title(\"Distribution par pays (% par cluster)\")\n",
|
|||
|
|
"axes[0].set_xlabel(\"Pays\")\n",
|
|||
|
|
"axes[0].set_ylabel(\"Cluster\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"sns.heatmap(\n",
|
|||
|
|
" geo_region,\n",
|
|||
|
|
" cmap=\"Blues\", annot=True, fmt=\".1f\",\n",
|
|||
|
|
" ax=axes[1],\n",
|
|||
|
|
" cbar_kws={\"label\": \"%\"}\n",
|
|||
|
|
")\n",
|
|||
|
|
"axes[1].set_title(\"Distribution par région (% par cluster)\")\n",
|
|||
|
|
"axes[1].set_xlabel(\"Région\")\n",
|
|||
|
|
"axes[1].set_ylabel(\"Cluster\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.tight_layout()\n",
|
|||
|
|
"plt.show()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 201,
|
|||
|
|
"id": "50ecf35e-de7e-44ae-afee-404186c4d42c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
" flow_freq gross_flow_to_aum n_isin_total \\\n",
|
|||
|
|
"flow_freq 1.000 0.043 0.484 \n",
|
|||
|
|
"gross_flow_to_aum 0.043 1.000 0.087 \n",
|
|||
|
|
"n_isin_total 0.484 0.087 1.000 \n",
|
|||
|
|
"avg_holding_months_per_isin 0.074 -0.019 0.027 \n",
|
|||
|
|
"exit_rate_per_isin -0.066 -0.029 -0.103 \n",
|
|||
|
|
"flow_direction_balance 0.182 0.007 0.038 \n",
|
|||
|
|
"log_aum_qty_mean 0.522 -0.047 0.381 \n",
|
|||
|
|
"months_since_last_tx -0.513 -0.012 -0.229 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_holding_months_per_isin exit_rate_per_isin \\\n",
|
|||
|
|
"flow_freq 0.074 -0.066 \n",
|
|||
|
|
"gross_flow_to_aum -0.019 -0.029 \n",
|
|||
|
|
"n_isin_total 0.027 -0.103 \n",
|
|||
|
|
"avg_holding_months_per_isin 1.000 -0.257 \n",
|
|||
|
|
"exit_rate_per_isin -0.257 1.000 \n",
|
|||
|
|
"flow_direction_balance -0.163 0.093 \n",
|
|||
|
|
"log_aum_qty_mean 0.140 0.024 \n",
|
|||
|
|
"months_since_last_tx -0.306 0.159 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_direction_balance log_aum_qty_mean \\\n",
|
|||
|
|
"flow_freq 0.182 0.522 \n",
|
|||
|
|
"gross_flow_to_aum 0.007 -0.047 \n",
|
|||
|
|
"n_isin_total 0.038 0.381 \n",
|
|||
|
|
"avg_holding_months_per_isin -0.163 0.140 \n",
|
|||
|
|
"exit_rate_per_isin 0.093 0.024 \n",
|
|||
|
|
"flow_direction_balance 1.000 0.298 \n",
|
|||
|
|
"log_aum_qty_mean 0.298 1.000 \n",
|
|||
|
|
"months_since_last_tx -0.119 -0.389 \n",
|
|||
|
|
"\n",
|
|||
|
|
" months_since_last_tx \n",
|
|||
|
|
"flow_freq -0.513 \n",
|
|||
|
|
"gross_flow_to_aum -0.012 \n",
|
|||
|
|
"n_isin_total -0.229 \n",
|
|||
|
|
"avg_holding_months_per_isin -0.306 \n",
|
|||
|
|
"exit_rate_per_isin 0.159 \n",
|
|||
|
|
"flow_direction_balance -0.119 \n",
|
|||
|
|
"log_aum_qty_mean -0.389 \n",
|
|||
|
|
"months_since_last_tx 1.000 \n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Test : corrélations entre features\n",
|
|||
|
|
"corr_matrix = dfc[base_features].corr().round(3)\n",
|
|||
|
|
"print(corr_matrix)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 202,
|
|||
|
|
"id": "273392b8-c60c-4b19-ab4e-760616d3c246",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"base_features: 8\n",
|
|||
|
|
"share_asset: []\n",
|
|||
|
|
"share_fund: []\n",
|
|||
|
|
"X columns: 8\n",
|
|||
|
|
"X_num columns: 8\n",
|
|||
|
|
"X_cat columns: 20\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"print(\"base_features:\", len(base_features))\n",
|
|||
|
|
"print(\"share_asset:\", [c for c in all_features if c.startswith(\"share_asset_\")])\n",
|
|||
|
|
"print(\"share_fund:\", [c for c in all_features if c.startswith(\"share_fund_\")])\n",
|
|||
|
|
"print(\"X columns:\", X.shape[1])\n",
|
|||
|
|
"print(\"X_num columns:\", X_num.shape[1])\n",
|
|||
|
|
"print(\"X_cat columns:\", X_cat.shape[1])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 203,
|
|||
|
|
"id": "d42b5319-c66c-4a7f-aeac-d3044d07f499",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"=== share_fund_ disponibles ===\n",
|
|||
|
|
"['share_fund_carmignac_court_terme', 'share_fund_carmignac_emergents', 'share_fund_carmignac_investissement', 'share_fund_carmignac_patrimoine', 'share_fund_carmignac_portfolio_credit', 'share_fund_carmignac_portfolio_flexible_b', 'share_fund_carmignac_portfolio_global_bon', 'share_fund_carmignac_portfolio_patrimoine', 'share_fund_carmignac_portfolio_sécurité', 'share_fund_carmignac_sécurité']\n",
|
|||
|
|
"\n",
|
|||
|
|
"=== share_asset_ disponibles ===\n",
|
|||
|
|
"['share_asset_alternative', 'share_asset_diversified', 'share_asset_equity', 'share_asset_fixed_income', 'share_asset_private_assets']\n",
|
|||
|
|
"share_fund_carmignac_court_terme: 0.000000\n",
|
|||
|
|
"share_fund_carmignac_emergents: 0.000001\n",
|
|||
|
|
"share_fund_carmignac_investissement: 0.000032\n",
|
|||
|
|
"share_fund_carmignac_patrimoine: 0.011248\n",
|
|||
|
|
"share_fund_carmignac_portfolio_credit: 0.000000\n",
|
|||
|
|
"share_fund_carmignac_portfolio_flexible_b: 0.000000\n",
|
|||
|
|
"share_fund_carmignac_portfolio_global_bon: 0.000000\n",
|
|||
|
|
"share_fund_carmignac_portfolio_patrimoine: 0.000000\n",
|
|||
|
|
"share_fund_carmignac_portfolio_sécurité: 0.000000\n",
|
|||
|
|
"share_fund_carmignac_sécurité: 0.000080\n",
|
|||
|
|
"share_asset_alternative: 0.000000\n",
|
|||
|
|
"share_asset_diversified: 0.027594\n",
|
|||
|
|
"share_asset_equity: 0.009158\n",
|
|||
|
|
"share_asset_fixed_income: 0.130769\n",
|
|||
|
|
"share_asset_private_assets: 0.000000\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Identifier les share_fund_ et share_asset_ les plus discriminantes\n",
|
|||
|
|
"top_share_funds = [c for c in dfc.columns if c.startswith(\"share_fund_\")]\n",
|
|||
|
|
"top_share_assets = [c for c in dfc.columns if c.startswith(\"share_asset_\")]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Regarder lesquelles varient le plus entre clusters\n",
|
|||
|
|
"print(\"=== share_fund_ disponibles ===\")\n",
|
|||
|
|
"print(top_share_funds)\n",
|
|||
|
|
"print(\"\\n=== share_asset_ disponibles ===\")\n",
|
|||
|
|
"print(top_share_assets)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Variance inter-cluster pour chaque share variable\n",
|
|||
|
|
"for col in top_share_funds + top_share_assets:\n",
|
|||
|
|
" var_inter = dfc.groupby(\"cluster_k4\")[col].median().var()\n",
|
|||
|
|
" print(f\"{col}: {var_inter:.6f}\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"id": "7de1a58e-37ec-4d13-9807-5b047ec6ff42",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import subprocess\n",
|
|||
|
|
"subprocess.run([\"jupyter\", \"nbconvert\", \"--to\", \"html\", \"clustering_clean.ipynb\"])"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.13.11"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 5
|
|||
|
|
}
|