5001 lines
2.3 MiB
Plaintext
5001 lines
2.3 MiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 1,
|
|||
|
|
"id": "beb29e50-e6ef-4cf9-a355-45fbd2f4f08d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
|
|||
|
|
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Agreement - Code</th>\n",
|
|||
|
|
" <th>Company - Id</th>\n",
|
|||
|
|
" <th>Company - Ultimate Parent Id</th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Registrar Account - Region</th>\n",
|
|||
|
|
" <th>RegistrarAccount - Country</th>\n",
|
|||
|
|
" <th>Product - Asset Type</th>\n",
|
|||
|
|
" <th>Product - Strategy</th>\n",
|
|||
|
|
" <th>Product - Legal Status</th>\n",
|
|||
|
|
" <th>Product - Is Dedie ?</th>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <th>Centralisation Date</th>\n",
|
|||
|
|
" <th>Quantity - Subscription</th>\n",
|
|||
|
|
" <th>Quantity - Redemption</th>\n",
|
|||
|
|
" <th>Quantity - NetFlows</th>\n",
|
|||
|
|
" <th>Value Ccy - Subscription</th>\n",
|
|||
|
|
" <th>Value Ccy - Redemption</th>\n",
|
|||
|
|
" <th>Value Ccy - NetFlows</th>\n",
|
|||
|
|
" <th>Value € - Subscription</th>\n",
|
|||
|
|
" <th>Value € - Redemption</th>\n",
|
|||
|
|
" <th>Value € - NetFlows</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>200127202</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>Equity</td>\n",
|
|||
|
|
" <td>Investissement</td>\n",
|
|||
|
|
" <td>SICAV</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2020-11-05</td>\n",
|
|||
|
|
" <td>1636.00</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>1636.000</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>Diversified</td>\n",
|
|||
|
|
" <td>Patrimoine</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2015-03-09</td>\n",
|
|||
|
|
" <td>144.69</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>144.690</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>Equity</td>\n",
|
|||
|
|
" <td>Investissement</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2016-10-26</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-8.321</td>\n",
|
|||
|
|
" <td>-8.321</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>Equity</td>\n",
|
|||
|
|
" <td>Investissement</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2018-10-18</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-22.083</td>\n",
|
|||
|
|
" <td>-22.083</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>France</td>\n",
|
|||
|
|
" <td>Equity</td>\n",
|
|||
|
|
" <td>Investissement</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2019-04-08</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-465.992</td>\n",
|
|||
|
|
" <td>-465.992</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>5 rows × 24 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
|
|||
|
|
"0 003 166 166 \n",
|
|||
|
|
"1 003 166 166 \n",
|
|||
|
|
"2 003 166 166 \n",
|
|||
|
|
"3 003 166 166 \n",
|
|||
|
|
"4 003 166 166 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Registrar Account - ID Registrar Account - Region \\\n",
|
|||
|
|
"0 200127202 France \n",
|
|||
|
|
"1 406533 France \n",
|
|||
|
|
"2 406533 France \n",
|
|||
|
|
"3 406533 France \n",
|
|||
|
|
"4 406533 France \n",
|
|||
|
|
"\n",
|
|||
|
|
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
|
|||
|
|
"0 France Equity Investissement \n",
|
|||
|
|
"1 France Diversified Patrimoine \n",
|
|||
|
|
"2 France Equity Investissement \n",
|
|||
|
|
"3 France Equity Investissement \n",
|
|||
|
|
"4 France Equity Investissement \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
|
|||
|
|
"0 SICAV NO ... 2020-11-05 \n",
|
|||
|
|
"1 FCP NO ... 2015-03-09 \n",
|
|||
|
|
"2 FCP NO ... 2016-10-26 \n",
|
|||
|
|
"3 FCP NO ... 2018-10-18 \n",
|
|||
|
|
"4 FCP NO ... 2019-04-08 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
|
|||
|
|
"0 1636.00 0.000 1636.000 \n",
|
|||
|
|
"1 144.69 0.000 144.690 \n",
|
|||
|
|
"2 0.00 -8.321 -8.321 \n",
|
|||
|
|
"3 0.00 -22.083 -22.083 \n",
|
|||
|
|
"4 0.00 -465.992 -465.992 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
|
|||
|
|
"0 280983.00 0.00 280983.00 \n",
|
|||
|
|
"1 99985.13 0.00 99985.13 \n",
|
|||
|
|
"2 0.00 -9384.76 -9384.76 \n",
|
|||
|
|
"3 0.00 -25227.40 -25227.40 \n",
|
|||
|
|
"4 0.00 -563775.76 -563775.76 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
|
|||
|
|
"0 280983.00 0.00 280983.00 \n",
|
|||
|
|
"1 99985.13 0.00 99985.13 \n",
|
|||
|
|
"2 0.00 -9384.76 -9384.76 \n",
|
|||
|
|
"3 0.00 -25227.40 -25227.40 \n",
|
|||
|
|
"4 0.00 -563775.76 -563775.76 \n",
|
|||
|
|
"\n",
|
|||
|
|
"[5 rows x 24 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 1,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Import des données\n",
|
|||
|
|
"\n",
|
|||
|
|
"import os\n",
|
|||
|
|
"import s3fs\n",
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"\n",
|
|||
|
|
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
|
|||
|
|
"\n",
|
|||
|
|
"BUCKET = \"projet-bdc-data\"\n",
|
|||
|
|
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Liste des fichiers FLOWS\n",
|
|||
|
|
"all_files = fs.ls(carmignac_path)\n",
|
|||
|
|
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
|
|||
|
|
"print(\"Fichiers Flows :\", flows_files)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Lire tous les fichiers dans un dictionnaire\n",
|
|||
|
|
"flows_data = {}\n",
|
|||
|
|
"for file_path in flows_files:\n",
|
|||
|
|
" with fs.open(file_path, 'r') as f:\n",
|
|||
|
|
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
|
|||
|
|
" flows_data[os.path.basename(file_path)] = df\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Liste des fichiers AUM\n",
|
|||
|
|
"all_files = fs.ls(carmignac_path)\n",
|
|||
|
|
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
|
|||
|
|
"print(\"Fichiers AUM :\", aum_files)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Lire tous les fichiers dans un dictionnaire\n",
|
|||
|
|
"aum_data = {}\n",
|
|||
|
|
"for file_path in aum_files:\n",
|
|||
|
|
" with fs.open(file_path, 'r') as f:\n",
|
|||
|
|
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
|
|||
|
|
" aum_data[os.path.basename(file_path)] = df\n",
|
|||
|
|
"\n",
|
|||
|
|
"df = aum_data['AUM ENSAE V2 -20251105.csv']\n",
|
|||
|
|
"dg = flows_data['Flows ENSAE V2 -20251105.csv']\n",
|
|||
|
|
"\n",
|
|||
|
|
"df.head()\n",
|
|||
|
|
"dg.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 26,
|
|||
|
|
"id": "d0ab80b2-bd82-4ee3-be81-1b3105a0ed25",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import warnings\n",
|
|||
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"import matplotlib.pyplot as plt\n",
|
|||
|
|
"import seaborn as sns\n",
|
|||
|
|
"\n",
|
|||
|
|
"from sklearn.preprocessing import StandardScaler, RobustScaler\n",
|
|||
|
|
"from sklearn.cluster import KMeans\n",
|
|||
|
|
"from sklearn.mixture import GaussianMixture\n",
|
|||
|
|
"from sklearn.metrics import silhouette_score, davies_bouldin_score, pairwise_distances\n",
|
|||
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
|
"from sklearn.neighbors import kneighbors_graph\n",
|
|||
|
|
"from sklearn.manifold import MDS\n",
|
|||
|
|
"\n",
|
|||
|
|
"sns.set_style(\"whitegrid\")\n",
|
|||
|
|
"pd.set_option(\"display.max_columns\", 200)\n",
|
|||
|
|
"pd.set_option(\"display.max_rows\", 200)\n",
|
|||
|
|
"\n",
|
|||
|
|
"EPS = 1e-9\n",
|
|||
|
|
"RANDOM_STATE = 42"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 4,
|
|||
|
|
"id": "87096e14-33f6-4619-969e-c4d865c5318c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# Filtrer les comptes techniques\n",
|
|||
|
|
"\n",
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"\n",
|
|||
|
|
"df['Centralisation Date'] = pd.to_datetime(df['Centralisation Date'])\n",
|
|||
|
|
"dg['Centralisation Date'] = pd.to_datetime(dg['Centralisation Date'])\n",
|
|||
|
|
"df = df[~df['Registrar Account - ID'].isin(['Off Distribution','Private Clients', 'Private Client'])]\n",
|
|||
|
|
"dg = dg[~dg['Registrar Account - ID'].isin(['Off Distribution','Private Clients','Private Client'])]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 5,
|
|||
|
|
"id": "b58e0058-11f8-40f8-b23c-0d5cd599d8e7",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# Date de référence et sélection des 400+ principaux codes\n",
|
|||
|
|
"\n",
|
|||
|
|
"ref_date = pd.Timestamp('2025-10-31')\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_ref = df[df['Centralisation Date'] == ref_date]\n",
|
|||
|
|
"\n",
|
|||
|
|
"aum_account = (\n",
|
|||
|
|
" df_ref\n",
|
|||
|
|
" .groupby('Registrar Account - ID')['Value - AUM €']\n",
|
|||
|
|
" .sum()\n",
|
|||
|
|
" .reset_index()\n",
|
|||
|
|
" .sort_values(by='Value - AUM €', ascending=False)\n",
|
|||
|
|
")\n",
|
|||
|
|
"aum_account = aum_account[aum_account['Value - AUM €'] > 5_000_000]\n",
|
|||
|
|
"selected_accounts = aum_account['Registrar Account - ID']\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_aum = df[df['Registrar Account - ID'].isin(selected_accounts)].copy()\n",
|
|||
|
|
"df_flows = dg[dg['Registrar Account - ID'].isin(selected_accounts)].copy()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 30,
|
|||
|
|
"id": "5e55a442-14ca-454c-bcf7-432d21dc2e23",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"(33972, 6)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>month</th>\n",
|
|||
|
|
" <th>aum_qty</th>\n",
|
|||
|
|
" <th>net_flow_qty</th>\n",
|
|||
|
|
" <th>gross_flow_qty</th>\n",
|
|||
|
|
" <th>n_tx</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>179864.637</td>\n",
|
|||
|
|
" <td>-1524.010</td>\n",
|
|||
|
|
" <td>15230.010</td>\n",
|
|||
|
|
" <td>32</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-02-28</td>\n",
|
|||
|
|
" <td>186761.736</td>\n",
|
|||
|
|
" <td>7247.100</td>\n",
|
|||
|
|
" <td>18571.880</td>\n",
|
|||
|
|
" <td>38</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>190357.718</td>\n",
|
|||
|
|
" <td>3655.380</td>\n",
|
|||
|
|
" <td>9754.040</td>\n",
|
|||
|
|
" <td>47</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-04-30</td>\n",
|
|||
|
|
" <td>191429.324</td>\n",
|
|||
|
|
" <td>-218.394</td>\n",
|
|||
|
|
" <td>12840.950</td>\n",
|
|||
|
|
" <td>39</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-05-31</td>\n",
|
|||
|
|
" <td>189056.475</td>\n",
|
|||
|
|
" <td>-4782.849</td>\n",
|
|||
|
|
" <td>6332.849</td>\n",
|
|||
|
|
" <td>24</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID month aum_qty net_flow_qty gross_flow_qty \\\n",
|
|||
|
|
"0 18872 2015-01-31 179864.637 -1524.010 15230.010 \n",
|
|||
|
|
"1 18872 2015-02-28 186761.736 7247.100 18571.880 \n",
|
|||
|
|
"2 18872 2015-03-31 190357.718 3655.380 9754.040 \n",
|
|||
|
|
"3 18872 2015-04-30 191429.324 -218.394 12840.950 \n",
|
|||
|
|
"4 18872 2015-05-31 189056.475 -4782.849 6332.849 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx \n",
|
|||
|
|
"0 32 \n",
|
|||
|
|
"1 38 \n",
|
|||
|
|
"2 47 \n",
|
|||
|
|
"3 39 \n",
|
|||
|
|
"4 24 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 30,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Variables par mois\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Parse dates\n",
|
|||
|
|
"df_flows[\"Centralisation Date\"] = pd.to_datetime(df_flows[\"Centralisation Date\"], errors=\"coerce\")\n",
|
|||
|
|
"df_aum[\"Centralisation Date\"] = pd.to_datetime(df_aum[\"Centralisation Date\"], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"ID_COL = \"Registrar Account - ID\"\n",
|
|||
|
|
"FLOW_COL = \"Quantity - NetFlows\"\n",
|
|||
|
|
"AUM_COL = \"Quantity - AUM\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Month key\n",
|
|||
|
|
"df_flows[\"month\"] = df_flows[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"df_aum[\"month\"] = df_aum[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"# Flows sont journaliers, AUM est mensuel → il faut une granularité commune.\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 1) Monthly aggregation for FLOWS : je fais mon etude mensuel parce que aum valeur mensuel \n",
|
|||
|
|
"\n",
|
|||
|
|
"ID_COL = \"Registrar Account - ID\"\n",
|
|||
|
|
"FLOW_COL = \"Quantity - NetFlows\"\n",
|
|||
|
|
"AUM_COL = \"Quantity - AUM\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_flows_m = (\n",
|
|||
|
|
" df_flows\n",
|
|||
|
|
" .dropna(subset=[ID_COL, \"month\", FLOW_COL])\n",
|
|||
|
|
" .assign(gross_flow_qty=lambda x: x[FLOW_COL].abs()) # absolute quantity moved\n",
|
|||
|
|
" .groupby([ID_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" net_flow_qty=(FLOW_COL, \"sum\"), # net quantity change over the month\n",
|
|||
|
|
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"), # total traded quantity (activity intensity)\n",
|
|||
|
|
" n_tx=(FLOW_COL, \"size\"), # number of transactions\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 2) Monthly aggregation for AUM (client-month holdings) ---\n",
|
|||
|
|
"df_aum_m = (\n",
|
|||
|
|
" df_aum\n",
|
|||
|
|
" .dropna(subset=[ID_COL, \"month\", AUM_COL])\n",
|
|||
|
|
" .groupby([ID_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(aum_qty=(AUM_COL, \"sum\")) # total held quantity across ISINs\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month0 = df_aum_m.merge(df_flows_m, on=[ID_COL, \"month\"], how=\"left\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 4) Months without transactions => flows are 0 ---\n",
|
|||
|
|
"df_month0[\"net_flow_qty\"] = df_month0[\"net_flow_qty\"].fillna(0.0)\n",
|
|||
|
|
"df_month0[\"gross_flow_qty\"] = df_month0[\"gross_flow_qty\"].fillna(0.0)\n",
|
|||
|
|
"df_month0[\"n_tx\"] = df_month0[\"n_tx\"].fillna(0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(df_month0.shape)\n",
|
|||
|
|
"df_month0.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 63,
|
|||
|
|
"id": "cada672f-2944-4563-82c0-6117569a2f4c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"eps = 1e-9 \n",
|
|||
|
|
"\n",
|
|||
|
|
"# 1) Active month indicator: did the client trade this month?\n",
|
|||
|
|
"df_month0[\"active_month\"] = (df_month0[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"#client avec beaucoup de mois à 0 → “stable / dormant”\n",
|
|||
|
|
"#client actif presque tous les mois → “rebalancer / institutionnel actif”\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 2) Monthly relative intensity (turnover proxy in quantity terms) : Mesurer l’intensité de trading relativement à la taille et pouvoir ocmparer client petit avec client plus gros\n",
|
|||
|
|
"df_month0[\"rel_intensity_m\"] = df_month0[\"gross_flow_qty\"] / (df_month0[\"aum_qty\"].abs() + eps)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 3) Monthly net flow ratio (directional change): sert a Capturer la direction de la dynamique\n",
|
|||
|
|
"df_month0[\"netflow_to_aum_m\"] = df_month0[\"net_flow_qty\"] / (df_month0[\"aum_qty\"].abs() + eps)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 4) Aggregate to client-level features (1 row per client)\n",
|
|||
|
|
"df_client_feat0 = (\n",
|
|||
|
|
" df_month0.groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" # Coverage / activity\n",
|
|||
|
|
" n_months=(\"month\", \"nunique\"),\n",
|
|||
|
|
" n_active_months=(\"active_month\", \"sum\"),\n",
|
|||
|
|
" flow_freq=(\"active_month\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Size in quantity terms\n",
|
|||
|
|
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
|
|||
|
|
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Flows in quantity terms\n",
|
|||
|
|
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Dispersion / volatility proxy\n",
|
|||
|
|
" net_flow_qty_vol=(\"net_flow_qty\", \"std\"),\n",
|
|||
|
|
" rel_intensity=(\"rel_intensity_m\", \"mean\"),\n",
|
|||
|
|
" netflow_to_aum=(\"netflow_to_aum_m\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Trading frequency proxy\n",
|
|||
|
|
" n_tx_total=(\"n_tx\", \"sum\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 5) Clean NaNs due to std on constant series\n",
|
|||
|
|
"df_client_feat0[\"net_flow_qty_vol\"] = df_client_feat0[\"net_flow_qty_vol\"].fillna(0.0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 6) Log transforms (useful because distributions are heavy-tailed)\n",
|
|||
|
|
"df_client_feat0[\"log_aum_qty_mean\"] = np.log1p(df_client_feat0[\"aum_qty_mean\"].clip(lower=0))\n",
|
|||
|
|
"df_client_feat0[\"log_gross_flow_qty_mean\"] = np.log1p(df_client_feat0[\"gross_flow_qty_mean\"].clip(lower=0))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 7) Global turnover proxy\n",
|
|||
|
|
"df_client_feat0[\"gross_flow_to_aum\"] = df_client_feat0[\"gross_flow_qty_sum\"] / (df_client_feat0[\"aum_qty_mean\"].abs() + eps)\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc0 = df_client_feat0.copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Minimal filters (adjust if needed)\n",
|
|||
|
|
"dfc0 = dfc0[(dfc0[\"n_months\"] >= 6)] # at least 6 observed months\n",
|
|||
|
|
"dfc0 = dfc0[(dfc0[\"aum_qty_mean\"].abs() > 0)] # avoid zero holdings\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc0[\"frequency\"] = dfc0[\"flow_freq\"]\n",
|
|||
|
|
"dfc0[\"rel_intensity_total\"] = dfc0[\"gross_flow_to_aum\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Choose a compact, interpretable feature set (quantity-based)\n",
|
|||
|
|
"features0 = [\n",
|
|||
|
|
" \"log_aum_qty_mean\", # size (log)\n",
|
|||
|
|
" \"log_gross_flow_qty_mean\", # activity intensity (log)\n",
|
|||
|
|
" \"frequency\", # activity frequency\n",
|
|||
|
|
" \"rel_intensity_total\", # turnover proxy\n",
|
|||
|
|
" \"net_flow_qty_vol\", # volatility of net flows\n",
|
|||
|
|
" \"n_tx_total\", # total number of transactions\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Build X (drop NaNs/Infs)\n",
|
|||
|
|
"X0 = (dfc0[features0]\n",
|
|||
|
|
" .replace([np.inf, -np.inf], np.nan)\n",
|
|||
|
|
" .dropna()\n",
|
|||
|
|
" .copy())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Keep IDs aligned\n",
|
|||
|
|
"ids = dfc0.loc[X0.index, ID_COL].copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Standardize (critical for distance-based clustering)\n",
|
|||
|
|
"scaler = StandardScaler()\n",
|
|||
|
|
"X_scaled0 = scaler.fit_transform(X0)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 74,
|
|||
|
|
"id": "2ed912bd-040c-4436-9a9d-ae6bf7d9248f",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAezFJREFUeJzt3XlcVNX7B/DPzDDs+765oMgmKJsLhJHmlku529ctjSw109JyrdxKrNTMJTUy3NNKKPfSFtMUcUNBwV0RUFbZRLZhfn8Q83Nim9GBGZjP+/XiJffec+88DyPwcO655wikUqkURERERFpMqO4AiIiIiNSNBRERERFpPRZEREREpPVYEBEREZHWY0FEREREWo8FEREREWk9FkRERESk9VgQERERkdZjQURERERajwURkQZzd3fHmjVrZNtr1qyBu7s7cnJy1BiV9nj06BHmz5+P5557Du7u7vj000/VHRIRNRAWRESNLCoqCu7u7rV+xMXFqTtE+tfGjRsRHR2N//3vf/j888/xyiuvNOjr9ejRA2+99Va1/T///DM8PT0RFhaGkpKSBo2BSFvpqDsAIm01bdo0ODs7V9vfsmVLNURDNYmJiUHHjh0xdepUtcWwd+9ezJ07F8HBwfj666+hp6entliImjMWRERq8vzzz8PHx0fdYTQpRUVFMDQ0bLTXy87Ohqurq8quV15ejoqKCujq6irU/sCBA5gzZw66du3KYoiogfGWGVET9PDhQ0yfPh3+/v7o0qULPvnkk2q3UsrLy7Fu3Tr07NkT3t7e6NGjB1auXInS0lJZm/DwcHTp0gVSqVS2b8mSJXB3d8fWrVtl+7KysuDu7o6dO3fWG9svv/yCYcOGoWPHjujUqRNGjx6NEydOyI7/d1xUlR49emDOnDmy7apbi7GxsVi4cCGCgoIQGhqKw4cPy/b/165du+Du7o5r167J9t28eRPTpk1D586d4ePjgyFDhuD333+vM4fTp0/D3d0dKSkp+Ouvv2S3M1NSUgBUFkrz5s1DcHAwfHx88PLLLyM6OlruGikpKXB3d8emTZuwefNm9OzZEz4+Prh582a9X0MAOHjwID744AN07twZ69evr7cYcnd3x+LFi3Ho0CH069cPHTp0wMiRI3H16lXZ16ZXr17w8fHB2LFjZbk86eLFiwgLC0NAQAA6duyIMWPG4Ny5c3JtUlNTsXDhQvTp0wcdOnRAly5dMG3atGrXq3r/zp07h/DwcHTt2hW+vr54++23q42Bi4+PR1hYGLp06YIOHTqgR48emDt3rkJfJyJVYQ8RkZoUFhZW+8UgEAhgYWFR77nvvvsunJycMHPmTMTFxWHbtm3Iz8/H559/Lmvz4YcfIjo6Gn369MGECRNw6dIlbNy4ETdv3sS6desAAIGBgdi8eTOuX78ONzc3AMDZs2chFApx9uxZjBs3TrYPADp16lRnXGvXrsWaNWvg5+eHadOmQSwW4+LFi4iJiUFISIjiX5wnLFq0CJaWlnj77bdRVFSEF154AYaGhjh06BA6d+4s1/bgwYNo166dLJfr16/jf//7H+zs7DBx4kTZeW+//TbWrFmDXr161fiabdu2xeeff47w8HDY29tjwoQJAABLS0sUFxdj7NixSE5OxujRo+Hs7IzDhw9jzpw5yM/Px2uvvSZ3raioKJSUlGDEiBHQ1dWFmZlZvTn/+uuv+OCDDxAYGIgNGzZAX19foa/V2bNn8ccff2DUqFEAgG+++QaTJk3CG2+8gZ07d2LUqFHIy8vDt99+i3nz5skVvadOncLEiRPh7e2NqVOnQiAQICoqCq+99hp27tyJDh06AKgsXi5cuID+/fvD3t4eqamp+P777zFu3DgcOHAABgYGcjF98sknMDU1xdSpU5GamootW7Zg8eLFWLVqFYDK4jIsLAwWFhZ48803YWpqipSUFBw5ckShnIlURkpEjWrPnj1SNze3Gj+8vb3l2rq5uUlXr14t2169erXUzc1NOmnSJLl2CxculLq5uUkTExOlUqlUmpiYKHVzc5POnz9frt2yZcukbm5u0lOnTkmlUqk0Oztb6ubmJt2xY4dUKpVK8/PzpR4eHtJp06ZJg4ODZectWbJE2rlzZ2lFRUWted25c0fq4eEhffvtt6USiUTu2JPn/TenKt27d5fOnj272tfpf//7n7S8vFyu7YwZM6RBQUFy+zMyMqQeHh7StWvXyva99tpr0gEDBkhLSkrkYhk5cqS0d+/etebyZExvvvmm3L7NmzdL3dzcpL/88otsX2lpqXTkyJFSX19faUFBgVQqlUrv3bsndXNzk/r7+0uzs7Prfa2q1wsJCZF6eXlJx4wZIy0qKlLoPKlUKvv/c+/ePdm+Xbt2Sd3c3KTPPfecLC6pVCpdsWKF1M3NTda2oqJC2rt3b+nrr78u9149fvxY2qNHD+mECRPk9v3XhQsXpG5ubtLo6GjZvqr3b/z48XLXXLp0qdTT01Oan58vlUql0iNHjkjd3Nykly5dUjhXoobAW2ZEavLxxx8jMjJS7iMiIkKhc0ePHi23PWbMGADA33//DQA4duwYAMh6Nqq8/vrrcsctLS3Rpk0bWQ/Q+fPnIRKJEBYWhqysLNy5cwcAcO7cOfj7+0MgENQa09GjR1FRUYG3334bQqH8j5a6zqvPiBEjIBKJ5Pa99NJLyM7Olrtt9uuvv6KiogL9+vUDAOTm5iImJgYvvfSSrDcuJycHDx8+REhICO7cuYP09HSl4/n7779hY2ODAQMGyPaJxWKMHTsWRUVFOHPmjFz73r17w9LSUuHr5+Xloby8HPb29gr3DFUJCgqSG6jfsWNHWQzGxsay/VW9Pffu3QMAJCYm4s6dOxg4cCAePnwo+1oVFRUhKCgIZ86cQUVFBQDIxVRWVoaHDx+iZcuWMDU1xZUrV6rFNGLECLn3PzAwEBKJBKmpqQAAExMTAMBff/2FsrIypfIlUiXeMiNSkw4dOjz1oOpWrVrJbbds2RJCoVA2jiM1NRVCobDaE2s2NjYwNTWV/TICKn9BVRVIZ8+ehbe3N3x8fGBubo6zZ8/C2toaSUlJcgVATZKTkyEUCtG2bdunyqk2NT2J9/zzz8PExAQHDx5EUFAQgMrbZZ6ennBxcZHFI5VK8dVXX+Grr76q8drZ2dmws7NTKp7U1FS0atWqWtFXlXdaWlq98dclKCgIDg4O+P7772FmZoYPP/xQdqygoADFxcWybbFYDHNzc9m2g4OD3LWqiiB7e3u5/VVFSH5+PgDICt/Zs2fXGldBQQHMzMxQXFyMjRs3IioqCunp6XLjzwoKCqqd5+joKLdtamoq99qdO3dGnz59sHbtWmzevBmdO3dGz549MXDgQIUHnxOpAgsiomagth4YRXpmAgIC8MMPP+DevXs4e/YsAgICIBAI4O/vj3PnzsHW1hYVFRUIDAxUddhyJBJJjftrGkysq6uLnj174siRI1iwYAGys7Nx/vx5zJgxQ9amqkfj9ddfR7du3Wq8dmNMcaBsLw9Q2XuYn5+Pbdu2wczMDO+88w4A4NNPP5UbvN25c2ds27ZNtv3fnrT69lcVM1X/zpo1C56enjW2rXq6b8mSJbKxRb6+vjAxMYFAIMB7770nVxxV+W/h+N/XFggEWL16NeLi4vDnn3/i+PHjmDdvHiIjI7F7924YGRnVeD6RqrEgImqC7t69ixYtWshtV1RUyHojnJycUFFRgbt378r12GRlZSE/Px9OTk6yfQEBAQCAf/75B/Hx8XjzzTcBVA6g/v7772FrawtDQ0O0b9++zphatmyJiooK3Lx5s9ZfqgBgZmYm6x2oUlpaiszMTAWzr/TSSy8hOjoap06dws2bNyGVSvHSSy/Jjld9fcRiMYKDg5W6dl2cnJxw9epVVFRUyP2yv3XrFoDqPSJPQygU4rPPPkNBQQHWrl0LMzMzjBs3Dm+88QZefvllWbuq3pZnVfW1MjY2rvdr9euvv2LQoEFyTwSWlJTU2DukDF9fX/j6+uK9997Dvn378P777+PgwYMYPnz4M12XSFEcQ0TUBO3YsUNue/v27QAqbyUBQGh
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjalJREFUeJzt3Xd809X6wPFPki46KJ1QymyhLdBC2VKWIAiCiIKoV0HBunCjiOv6wyoK94J6Rb1chiLgAJUhKKKigqBlyJBRZlt2SxfdO/n+/kgTKB00JavN8369eGm++eab5yRp8/Sc55yjUhRFQQghhBDCgahtHYAQQgghhLVJAiSEEEIIhyMJkBBCCCEcjiRAQgghhHA4kgAJIYQQwuFIAiSEEEIIhyMJkBBCCCEcjiRAQgghhHA4kgAJIYQQwuFIAiSEmQwdOpSXXnrJeHvnzp2Eh4ezc+dO47FJkyZx66232iI8YSVLlizhpptuolOnTowdO9bW4QghaiAJkBDXcOzYMZ5++mmGDBlCVFQUAwcOZMqUKaxYscLWoVnEyZMn+eCDDzh37lyV+z7//HPWrFljg6gahu3btzN37lx69OjB7Nmzee655yz6fC+99BLdu3evcvzo0aP07duXoUOHVvs+CiHAydYBCGHP9u7dy/3330/Lli2ZMGECAQEBpKSk8Pfff7N8+XImTZpkPHfTpk2oVCobRmseJ0+e5MMPP6RPnz60atWq0n1ffvklPj4+jBs3zkbR2bcdO3agVqt56623cHFxsUkMx48fZ/Lkybi7u7Ns2bIq76EQQk8SICFq8b///Q8vLy+++eYbmjZtWum+zMzMSrdt9YUnrp+iKJSUlODm5nZd18nMzMTNzc1snwVT4zpx4gQPPPAAbm5uLF++nNatW5slDiEaIxkCE6IWZ86coUOHDlWSHwA/P79Kt6+uAarNyZMnmTRpEt26dWPgwIEsXry4yjmZmZm88sorxMTEEBUVxW233cbatWsrnVNdnRHAuXPnCA8PrzJclZiYyNNPP02fPn2Iiopi3Lhx/PLLL8b716xZwzPPPAPA/fffT3h4uPH6Q4cO5cSJE+zatct4/MoesNzcXN566y0GDx5MZGQkw4cPZ9GiReh0umu+HgcPHiQ2Npa+ffvStWtXhg4dyssvv1zpHJ1Ox7JlyxgzZgxRUVHccMMNxMbGcvDgQeM55eXlfPTRRwwbNozIyEiGDh3Ku+++S2lpaaVrDR06lEcffZRt27Yxbtw4unbtysqVK6+rHYbXu7Cw0Pj6GF5/c8R1LYmJiUyePBkXF5c6JT+GerSjR48yceJEunXrxvDhw9m0aRMAu3btYsKECXTt2pURI0bw559/VrnGxYsXefnll4mJiSEyMpLRo0fzzTffVDqntLSU999/n3HjxtGzZ0+io6O599572bFjR6XzDJ/Zjz/+mFWrVhlfq/Hjx3PgwIFK56anp/Pyyy8zaNAgIiMjGTBgAFOnTpXhPmES6QESohbBwcHs27eP48ePExYWZpZr5uTk8NBDDzF8+HBuueUWfvzxR+bNm0dYWBiDBw8GoLi4mEmTJnHmzBnuu+8+WrVqxaZNm3jppZfIzc3lgQceMPl5T5w4wT/+8Q+aN2/Oww8/jLu7Oz/88ANPPPEEH3zwAcOHD6d3795MmjSJFStW8NhjjxESEgJAaGgor7zyCm+++Sbu7u489thjAPj7+wNQVFTExIkTuXjxIvfccw9BQUHs27ePd999l/T0dF599dUa48rMzCQ2NhYfHx8eeeQRmjZtyrlz5/j5558rnffqq6+yZs0aBg0axJ133olWq+Wvv/7i77//JioqCoB//vOfrF27lhEjRjBlyhQOHDjAwoULSUxM5KOPPqp0veTkZJ5//nnuvvtu7rrrLtq3b39d7fj3v//NV199xYEDB5g1axYAPXr0MEtc15KUlMQDDzyARqNh+fLltGnT5pqPAf1n8bHHHmPUqFGMHDmSL7/8kueeew6dTsfbb7/NPffcw6233srHH3/M008/zZYtW/D09AQgIyODu+66C5VKxX333Yevry+///47r776Kvn5+UyePBmA/Px8vv76a2699VYmTJhAQUEB33zzDQ899BBff/01nTp1qhTTd999R0FBAXfffTcqlYolS5bw1FNPsXnzZpydnQF46qmnOHnyJBMnTiQ4OJisrCz++OMPUlJSZMhP1J0ihKjR9u3blU6dOimdOnVS7r77buXf//63sm3bNqW0tLTKuUOGDFFefPFF4+0dO3YoYWFhyo4dO4zHJk6cqISFhSlr1641HispKVH69++vPPXUU8Zjn376qRIWFqZ8++23xmOlpaXK3XffrURHRyt5eXk1PoeiKMrZs2eVsLAwZfXq1cZjDzzwgHLrrbcqJSUlxmM6nU65++67lZtvvtl47Icffqj2moqiKKNHj1YmTpxY5fhHH32kREdHK8nJyZWOz5s3T+nUqZNy4cKFKo8x+Pnnn5WwsDDlwIEDNZ4THx+vhIWFKW+++WaV+3Q6naIoinLkyBElLCxMefXVVyvdP2fOHCUsLEyJj483HhsyZIgSFham/P7772Zrh6IoyosvvqhER0dXOmaOuGp7vi5duij9+/dXBgwYUCXu2hg+ixs2bDAeS0xMVMLCwpSIiAhl//79xuPbtm2r8nl65ZVXlP79+ytZWVmVrjtt2jSlZ8+eSlFRkaIoilJeXl7pM6coipKTk6PExMQoL7/8svGY4TPbp08fJTs723h88+bNSlhYmPLrr78aHxsWFqYsWbKkzm0VojoyBCZELfr378/KlSsZOnQoR48eZcmSJcTGxjJo0KBKQ0emcHd3rzQ92sXFhaioKM6ePWs89vvvvxMQEFBpyryzszOTJk2isLCQ3bt3m/Sc2dnZ7Nixg1tuuYX8/HyysrLIysri0qVLDBgwgFOnTnHx4sV6tQf0BeA9e/akadOmxmtnZWURExODVqutNV4vLy8AtmzZQllZWbXn/PTTT6hUKp588skq9xkKz7du3QrAlClTKt3/4IMPVrrfoFWrVgwcONBs7aiJOeKqjVarJTs7m2bNmuHj42NSbO7u7owePdp4OyQkhKZNmxIaGkq3bt2Mxw3/b/iMKorCTz/9xNChQ1EUpdJrNWDAAPLy8jh8+DAAGo3GWBOl0+nIzs6mvLycyMhIEhISqsQ0atQovL29jbd79epV6bnd3NxwdnZm165d5OTkmNReIa4kQ2BCXEPXrl358MMPKS0t5ejRo2zevJlPP/2UZ555hnXr1tGhQweTrteiRYsqs8W8vb05duyY8fb58+dp27YtanXlv1FCQ0MBuHDhgknPeebMGRRF4f333+f999+v9pzMzEyaN29u0nUNTp8+zbFjx+jXr1+192dlZdX42D59+jBixAg+/PBDPv30U/r06cOwYcMYM2aM8YvzzJkzBAYG0qxZsxqvc/78edRqdZXhn4CAAJo2bcr58+crHa9uqOR62mHJuGrj5ubGrFmzmD59Oo8++iiffPIJ7u7ugH4oNS8vr8rzGlT3WfTy8qJFixZVjoG+Pgr0r0Nubi6rVq1i1apV1cZ15Wu1du1aPvnkE5KTkysludW1NSgoqNJtQzJkeG4XFxemT5/Ov/71L/r370+3bt248cYbuf322yu1TYhrkQRIiDpycXGha9eudO3alXbt2vHyyy+zadOmanslaqPRaMwWU03T7q8u2DXcfvDBB2vsXahr3UhNz9e/f38eeuihau9v165djY9VqVTMnz+f/fv389tvv7Ft2zZeeeUVli5dyqpVq/Dw8DAplrouRVDdzKrraYcl47qW0aNHk5OTQ1xcHE899RQLFizAxcWFjRs3VikmvzLRrumzWNNxRVGAy5+n2267jTvuuKPac8PDwwH49ttveemllxg2bBixsbH4+fmh0WhYuHBhpV7Puj43wOTJkxk6dCibN29m+/btvP/++yxatIhly5bRuXPnah8vxNUkARKiHiIjIwFIS0uzyPWDg4M5duwYOp2uUi9QUlISAC1btgQwzk67+q/8q3sVDDOCnJ2diYmJqfW5a/u
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Best K by silhouette: 5\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# 1er clustering\n",
|
|||
|
|
"\n",
|
|||
|
|
"k_range = range(2, 21)\n",
|
|||
|
|
"inertias = []\n",
|
|||
|
|
"silhouettes = []\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in k_range:\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
|
|||
|
|
" labels = km.fit_predict(X_scaled0)\n",
|
|||
|
|
" inertias.append(km.inertia_)\n",
|
|||
|
|
" silhouettes.append(silhouette_score(X_scaled0, labels))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Elbow plot\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.plot(list(k_range), inertias, marker=\"o\")\n",
|
|||
|
|
"plt.xlabel(\"Number of clusters K\")\n",
|
|||
|
|
"plt.ylabel(\"Inertia (within-cluster SSE)\")\n",
|
|||
|
|
"plt.title(\"Elbow curve for K-means\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Silhouette plot\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.plot(list(k_range), silhouettes, marker=\"o\")\n",
|
|||
|
|
"plt.xlabel(\"Number of clusters K\")\n",
|
|||
|
|
"plt.ylabel(\"Silhouette score\")\n",
|
|||
|
|
"plt.title(\"Silhouette score for K-means\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"best_k = list(k_range)[int(np.argmax(silhouettes))]\n",
|
|||
|
|
"print(\"Best K by silhouette:\", best_k)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 75,
|
|||
|
|
"id": "3e11d00c-da49-435e-b47e-964478061ed6",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_med</th>\n",
|
|||
|
|
" <th>freq_med</th>\n",
|
|||
|
|
" <th>rel_int_med</th>\n",
|
|||
|
|
" <th>gross_flow_med</th>\n",
|
|||
|
|
" <th>n_tx_med</th>\n",
|
|||
|
|
" <th>vol_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_kmeans</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2.0</th>\n",
|
|||
|
|
" <td>235</td>\n",
|
|||
|
|
" <td>3.936071e+04</td>\n",
|
|||
|
|
" <td>0.986111</td>\n",
|
|||
|
|
" <td>4.136974</td>\n",
|
|||
|
|
" <td>2031.883965</td>\n",
|
|||
|
|
" <td>1069.0</td>\n",
|
|||
|
|
" <td>2.735326e+03</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1.0</th>\n",
|
|||
|
|
" <td>105</td>\n",
|
|||
|
|
" <td>4.528840e+05</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>4.651358</td>\n",
|
|||
|
|
" <td>28651.252789</td>\n",
|
|||
|
|
" <td>7585.0</td>\n",
|
|||
|
|
" <td>3.004524e+04</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0.0</th>\n",
|
|||
|
|
" <td>66</td>\n",
|
|||
|
|
" <td>6.912599e+04</td>\n",
|
|||
|
|
" <td>0.109903</td>\n",
|
|||
|
|
" <td>1.632692</td>\n",
|
|||
|
|
" <td>2773.037334</td>\n",
|
|||
|
|
" <td>7.5</td>\n",
|
|||
|
|
" <td>1.080610e+04</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4.0</th>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>4.783496e+04</td>\n",
|
|||
|
|
" <td>0.884615</td>\n",
|
|||
|
|
" <td>27.093690</td>\n",
|
|||
|
|
" <td>10629.415385</td>\n",
|
|||
|
|
" <td>1712.0</td>\n",
|
|||
|
|
" <td>1.876254e+04</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3.0</th>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>1.470709e+07</td>\n",
|
|||
|
|
" <td>0.586207</td>\n",
|
|||
|
|
" <td>5.705179</td>\n",
|
|||
|
|
" <td>851698.564766</td>\n",
|
|||
|
|
" <td>2210.5</td>\n",
|
|||
|
|
" <td>3.218539e+06</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_med freq_med rel_int_med \\\n",
|
|||
|
|
"cluster_kmeans \n",
|
|||
|
|
"2.0 235 3.936071e+04 0.986111 4.136974 \n",
|
|||
|
|
"1.0 105 4.528840e+05 1.000000 4.651358 \n",
|
|||
|
|
"0.0 66 6.912599e+04 0.109903 1.632692 \n",
|
|||
|
|
"4.0 13 4.783496e+04 0.884615 27.093690 \n",
|
|||
|
|
"3.0 2 1.470709e+07 0.586207 5.705179 \n",
|
|||
|
|
"\n",
|
|||
|
|
" gross_flow_med n_tx_med vol_med \n",
|
|||
|
|
"cluster_kmeans \n",
|
|||
|
|
"2.0 2031.883965 1069.0 2.735326e+03 \n",
|
|||
|
|
"1.0 28651.252789 7585.0 3.004524e+04 \n",
|
|||
|
|
"0.0 2773.037334 7.5 1.080610e+04 \n",
|
|||
|
|
"4.0 10629.415385 1712.0 1.876254e+04 \n",
|
|||
|
|
"3.0 851698.564766 2210.5 3.218539e+06 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 75,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"km = KMeans(n_clusters=5, n_init=50, random_state=42)\n",
|
|||
|
|
"labels_km = km.fit_predict(X_scaled0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc0.loc[X0.index, \"cluster_kmeans\"] = labels_km\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Profiling table (medians = robust to outliers)\n",
|
|||
|
|
"k_profile = (\n",
|
|||
|
|
" dfc0.loc[X0.index]\n",
|
|||
|
|
" .groupby(\"cluster_kmeans\")\n",
|
|||
|
|
" .agg(n_clients=(ID_COL, \"count\"),\n",
|
|||
|
|
" aum_qty_med=(\"aum_qty_mean\", \"median\"),\n",
|
|||
|
|
" freq_med=(\"frequency\", \"median\"),\n",
|
|||
|
|
" rel_int_med=(\"rel_intensity_total\", \"median\"),\n",
|
|||
|
|
" gross_flow_med=(\"gross_flow_qty_mean\", \"median\"),\n",
|
|||
|
|
" n_tx_med=(\"n_tx_total\", \"median\"),\n",
|
|||
|
|
" vol_med=(\"net_flow_qty_vol\", \"median\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values(\"n_clients\", ascending=False)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"k_profile"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 110,
|
|||
|
|
"id": "a466c121-8932-45cd-bba0-ed1388af0e48",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# Ajout de variables\n",
|
|||
|
|
"\n",
|
|||
|
|
"#external data projet-bdc-data /carmignac /Data Modélisation /Nav\n",
|
|||
|
|
"PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\" #C’est la table de valorisation / performance du produit.\n",
|
|||
|
|
"PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# optional competitors\n",
|
|||
|
|
"PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
|
|||
|
|
"PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
|
|||
|
|
"PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\""
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 111,
|
|||
|
|
"id": "e0a0db5c-5e8d-423d-a3da-e5195b2a982c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"df_nav = pd.read_csv(PATH_NAV, sep=\";\") \n",
|
|||
|
|
"# Une base de suivi de performance de fonds dans le temps, \n",
|
|||
|
|
"# Price (TF PartPrice) : prix de la part du fond\n",
|
|||
|
|
"# AUM Eur (Assets Under Management) : Taille du fonds en euros\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rates = pd.read_csv(PATH_RATES,sep=\";\")\n",
|
|||
|
|
"# df_rates : évolution dans le temps d’un taux de rendement obligataire (YTM)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_comp_flows = pd.read_csv(PATH_COMP_FLOWS,sep=\";\")\n",
|
|||
|
|
"# Estimated Fund-level Net Flow (Daily) : Flux nets estimés du fonds\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_comp_perf = pd.read_csv(PATH_COMP_PERF,sep=\";\")\n",
|
|||
|
|
"# perfPeriod : Horizon de performance\n",
|
|||
|
|
"# return : Performance du fonds sur la période donnée\n",
|
|||
|
|
"# percentile : Position du fonds par rapport à ses pairs\n",
|
|||
|
|
"# 0 → top performer\n",
|
|||
|
|
"# 100 → mauvais performer\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_peers = pd.read_csv(PATH_PEERS,sep=\"|\")\n",
|
|||
|
|
"# Global Broad Category Group : grande classe d’actifs\n",
|
|||
|
|
"# Global Category : catégorie plus précise et Morningstar Category\n",
|
|||
|
|
"# Index Fund : fonds indiciel (passif)\n",
|
|||
|
|
"# Enhanced Index → quasi-passif (légère surperformance recherchée)\n",
|
|||
|
|
"# Inception Date → date de création de la part\n",
|
|||
|
|
"# Inception Date of Fund's Oldest Share Class → âge réel du fonds\n",
|
|||
|
|
"# Domicile : pays de domiciliation du fonds"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 112,
|
|||
|
|
"id": "018abf7e-f15c-46e4-9625-028e7df6791d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"ID_COL = \"Registrar Account - ID\"\n",
|
|||
|
|
"ISIN_COL = \"Product - Isin\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"FLOW_DATE_COL = \"Centralisation Date\"\n",
|
|||
|
|
"AUM_DATE_COL = \"Centralisation Date\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
|
|||
|
|
"FLOW_SUB_COL = \"Quantity - Subscription\"\n",
|
|||
|
|
"FLOW_RED_COL = \"Quantity - Redemption\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"AUM_QTY_COL = \"Quantity - AUM\"\n",
|
|||
|
|
"AUM_VAL_COL = \"Value - AUM €\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"REGION_COL = \"Registrar Account - Region\"\n",
|
|||
|
|
"COUNTRY_COL = \"RegistrarAccount - Country\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"NAV_DATE_COL = \"Dat\"\n",
|
|||
|
|
"NAV_ISIN_COL = \"Isin\"\n",
|
|||
|
|
"NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
|
|||
|
|
"NAV_BENCH_COL = \"PriceBench\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"RATE_DATE_COL = \"Date\"\n",
|
|||
|
|
"RATE_VAL_COL = \"Yld to Maturity\""
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 113,
|
|||
|
|
"id": "51fdab3a-6f53-4042-8530-f91030134318",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"for df, date_col in [\n",
|
|||
|
|
" (df_flows, FLOW_DATE_COL),\n",
|
|||
|
|
" (df_aum, AUM_DATE_COL),\n",
|
|||
|
|
" (df_nav, NAV_DATE_COL),\n",
|
|||
|
|
" (df_rates, RATE_DATE_COL),\n",
|
|||
|
|
"]:\n",
|
|||
|
|
" df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_flows[\"month\"] = df_flows[FLOW_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"df_aum[\"month\"] = df_aum[AUM_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"df_nav[\"month\"] = df_nav[NAV_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"df_rates[\"month\"] = df_rates[RATE_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for col in [FLOW_QTY_COL, FLOW_SUB_COL, FLOW_RED_COL]:\n",
|
|||
|
|
" df_flows[col] = pd.to_numeric(df_flows[col], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for col in [AUM_QTY_COL, AUM_VAL_COL]:\n",
|
|||
|
|
" df_aum[col] = pd.to_numeric(df_aum[col], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for col in [NAV_PRICE_COL, NAV_BENCH_COL]:\n",
|
|||
|
|
" df_nav[col] = pd.to_numeric(df_nav[col], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for df, col in [(df_flows, ISIN_COL), (df_aum, ISIN_COL)]:\n",
|
|||
|
|
" df[col] = df[col].astype(str).str.strip()\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 114,
|
|||
|
|
"id": "da8e1811-88a6-459f-8350-42ae4d394c26",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"(701064, 18)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Product - Isin</th>\n",
|
|||
|
|
" <th>month</th>\n",
|
|||
|
|
" <th>aum_qty</th>\n",
|
|||
|
|
" <th>aum_val</th>\n",
|
|||
|
|
" <th>region</th>\n",
|
|||
|
|
" <th>country</th>\n",
|
|||
|
|
" <th>net_flow_qty</th>\n",
|
|||
|
|
" <th>gross_flow_qty</th>\n",
|
|||
|
|
" <th>sub_qty</th>\n",
|
|||
|
|
" <th>red_qty</th>\n",
|
|||
|
|
" <th>n_tx</th>\n",
|
|||
|
|
" <th>region_flow</th>\n",
|
|||
|
|
" <th>country_flow</th>\n",
|
|||
|
|
" <th>active_rel_month</th>\n",
|
|||
|
|
" <th>holding_rel_month</th>\n",
|
|||
|
|
" <th>flow_to_aum_rel</th>\n",
|
|||
|
|
" <th>turnover_rel</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>49094.915</td>\n",
|
|||
|
|
" <td>3.242523e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>673.990</td>\n",
|
|||
|
|
" <td>956.01</td>\n",
|
|||
|
|
" <td>859.990</td>\n",
|
|||
|
|
" <td>-186.000</td>\n",
|
|||
|
|
" <td>9.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.013728</td>\n",
|
|||
|
|
" <td>0.019473</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-02-28</td>\n",
|
|||
|
|
" <td>49797.915</td>\n",
|
|||
|
|
" <td>3.368032e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>988.000</td>\n",
|
|||
|
|
" <td>1712.00</td>\n",
|
|||
|
|
" <td>1350.000</td>\n",
|
|||
|
|
" <td>-362.000</td>\n",
|
|||
|
|
" <td>12.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.019840</td>\n",
|
|||
|
|
" <td>0.034379</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>50302.627</td>\n",
|
|||
|
|
" <td>3.505691e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>9.710</td>\n",
|
|||
|
|
" <td>1447.71</td>\n",
|
|||
|
|
" <td>785.710</td>\n",
|
|||
|
|
" <td>-776.000</td>\n",
|
|||
|
|
" <td>12.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.000193</td>\n",
|
|||
|
|
" <td>0.028780</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-04-30</td>\n",
|
|||
|
|
" <td>50219.393</td>\n",
|
|||
|
|
" <td>3.452433e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>-123.234</td>\n",
|
|||
|
|
" <td>1708.19</td>\n",
|
|||
|
|
" <td>853.478</td>\n",
|
|||
|
|
" <td>-976.712</td>\n",
|
|||
|
|
" <td>11.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-0.002454</td>\n",
|
|||
|
|
" <td>0.034015</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-05-31</td>\n",
|
|||
|
|
" <td>53685.393</td>\n",
|
|||
|
|
" <td>3.699729e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>121.000</td>\n",
|
|||
|
|
" <td>529.00</td>\n",
|
|||
|
|
" <td>325.000</td>\n",
|
|||
|
|
" <td>-204.000</td>\n",
|
|||
|
|
" <td>6.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.002254</td>\n",
|
|||
|
|
" <td>0.009854</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
|
|||
|
|
"0 18872 FR0010135103 2015-01-31 49094.915 3.242523e+07 \n",
|
|||
|
|
"1 18872 FR0010135103 2015-02-28 49797.915 3.368032e+07 \n",
|
|||
|
|
"2 18872 FR0010135103 2015-03-31 50302.627 3.505691e+07 \n",
|
|||
|
|
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
|
|||
|
|
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
|
|||
|
|
"\n",
|
|||
|
|
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
|
|||
|
|
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
|
|||
|
|
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
|
|||
|
|
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
|
|||
|
|
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
|
|||
|
|
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
|
|||
|
|
"0 9.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"1 12.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"2 12.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"3 11.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"4 6.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_to_aum_rel turnover_rel \n",
|
|||
|
|
"0 0.013728 0.019473 \n",
|
|||
|
|
"1 0.019840 0.034379 \n",
|
|||
|
|
"2 0.000193 0.028780 \n",
|
|||
|
|
"3 -0.002454 0.034015 \n",
|
|||
|
|
"4 0.002254 0.009854 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 114,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"'''On veut s’assurer que l’univers des produits Carmignac détenus par les clients est bien cohérent avec l’univers des NAV \n",
|
|||
|
|
"utilisés pour calculer les performances.'''\n",
|
|||
|
|
"\n",
|
|||
|
|
"#pour merge flux et aum c est un full\n",
|
|||
|
|
"'''\n",
|
|||
|
|
"Si un mois existe :\n",
|
|||
|
|
"dans les flux mais pas dans les encours → la clé est gardée\n",
|
|||
|
|
"dans les encours mais pas dans les flux → la clé est gardée\n",
|
|||
|
|
"dans les deux → une seule ligne de clé\n",
|
|||
|
|
"👉 C’est donc une logique proche d’un full outer join, mais construite manuellement.\n",
|
|||
|
|
"'''\n",
|
|||
|
|
"df_flows_rel_m = (\n",
|
|||
|
|
" df_flows\n",
|
|||
|
|
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
|
|||
|
|
" .assign(\n",
|
|||
|
|
" gross_flow_qty=lambda x: x[FLOW_QTY_COL].abs(),\n",
|
|||
|
|
" sub_qty=lambda x: x[FLOW_SUB_COL].fillna(0),\n",
|
|||
|
|
" red_qty=lambda x: x[FLOW_RED_COL].fillna(0)\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" net_flow_qty=(FLOW_QTY_COL, \"sum\"),\n",
|
|||
|
|
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" sub_qty=(\"sub_qty\", \"sum\"),\n",
|
|||
|
|
" red_qty=(\"red_qty\", \"sum\"),\n",
|
|||
|
|
" n_tx=(FLOW_QTY_COL, \"size\"),\n",
|
|||
|
|
" region=(REGION_COL, \"last\"),\n",
|
|||
|
|
" country=(COUNTRY_COL, \"last\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_aum_rel_m = (\n",
|
|||
|
|
" df_aum\n",
|
|||
|
|
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
|
|||
|
|
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" aum_qty=(AUM_QTY_COL, \"sum\"),\n",
|
|||
|
|
" aum_val=(AUM_VAL_COL, \"sum\"),\n",
|
|||
|
|
" region=(REGION_COL, \"last\"),\n",
|
|||
|
|
" country=(COUNTRY_COL, \"last\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"keys = pd.concat([\n",
|
|||
|
|
" df_flows_rel_m[[ID_COL, ISIN_COL, \"month\"]],\n",
|
|||
|
|
" df_aum_rel_m[[ID_COL, ISIN_COL, \"month\"]]\n",
|
|||
|
|
"]).drop_duplicates()\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m = (\n",
|
|||
|
|
" keys\n",
|
|||
|
|
" .merge(df_aum_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_aum\"))\n",
|
|||
|
|
" .merge(df_flows_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_flow\"))\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for c in [\"aum_qty\", \"aum_val\", \"net_flow_qty\", \"gross_flow_qty\", \"sub_qty\", \"red_qty\", \"n_tx\"]:\n",
|
|||
|
|
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m[\"region\"] = df_rel_m[\"region\"].fillna(df_rel_m.get(\"region_flow\"))\n",
|
|||
|
|
"df_rel_m[\"country\"] = df_rel_m[\"country\"].fillna(df_rel_m.get(\"country_flow\"))\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"df_rel_m[\"holding_rel_month\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
|
|||
|
|
"df_rel_m[\"flow_to_aum_rel\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
|
|||
|
|
"df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(df_rel_m.shape)\n",
|
|||
|
|
"df_rel_m.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 115,
|
|||
|
|
"id": "1815017e-d787-407b-b810-4d48b6f81c58",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Product - Isin</th>\n",
|
|||
|
|
" <th>month</th>\n",
|
|||
|
|
" <th>aum_qty</th>\n",
|
|||
|
|
" <th>aum_val</th>\n",
|
|||
|
|
" <th>region</th>\n",
|
|||
|
|
" <th>country</th>\n",
|
|||
|
|
" <th>net_flow_qty</th>\n",
|
|||
|
|
" <th>gross_flow_qty</th>\n",
|
|||
|
|
" <th>sub_qty</th>\n",
|
|||
|
|
" <th>red_qty</th>\n",
|
|||
|
|
" <th>n_tx</th>\n",
|
|||
|
|
" <th>region_flow</th>\n",
|
|||
|
|
" <th>country_flow</th>\n",
|
|||
|
|
" <th>active_rel_month</th>\n",
|
|||
|
|
" <th>holding_rel_month</th>\n",
|
|||
|
|
" <th>flow_to_aum_rel</th>\n",
|
|||
|
|
" <th>turnover_rel</th>\n",
|
|||
|
|
" <th>ret_fund_m</th>\n",
|
|||
|
|
" <th>ret_bench_m</th>\n",
|
|||
|
|
" <th>active_return_m</th>\n",
|
|||
|
|
" <th>delta_rate_m</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>49094.915</td>\n",
|
|||
|
|
" <td>3.242523e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>673.990</td>\n",
|
|||
|
|
" <td>956.01</td>\n",
|
|||
|
|
" <td>859.990</td>\n",
|
|||
|
|
" <td>-186.000</td>\n",
|
|||
|
|
" <td>9.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.013728</td>\n",
|
|||
|
|
" <td>0.019473</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>-0.058</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-02-28</td>\n",
|
|||
|
|
" <td>49797.915</td>\n",
|
|||
|
|
" <td>3.368032e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>988.000</td>\n",
|
|||
|
|
" <td>1712.00</td>\n",
|
|||
|
|
" <td>1350.000</td>\n",
|
|||
|
|
" <td>-362.000</td>\n",
|
|||
|
|
" <td>12.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.019840</td>\n",
|
|||
|
|
" <td>0.034379</td>\n",
|
|||
|
|
" <td>0.121368</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>-0.022</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>50302.627</td>\n",
|
|||
|
|
" <td>3.505691e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>9.710</td>\n",
|
|||
|
|
" <td>1447.71</td>\n",
|
|||
|
|
" <td>785.710</td>\n",
|
|||
|
|
" <td>-776.000</td>\n",
|
|||
|
|
" <td>12.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.000193</td>\n",
|
|||
|
|
" <td>0.028780</td>\n",
|
|||
|
|
" <td>0.068598</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>-0.014</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-04-30</td>\n",
|
|||
|
|
" <td>50219.393</td>\n",
|
|||
|
|
" <td>3.452433e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>-123.234</td>\n",
|
|||
|
|
" <td>1708.19</td>\n",
|
|||
|
|
" <td>853.478</td>\n",
|
|||
|
|
" <td>-976.712</td>\n",
|
|||
|
|
" <td>11.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-0.002454</td>\n",
|
|||
|
|
" <td>0.034015</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>-0.077</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-05-31</td>\n",
|
|||
|
|
" <td>53685.393</td>\n",
|
|||
|
|
" <td>3.699729e+07</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>121.000</td>\n",
|
|||
|
|
" <td>529.00</td>\n",
|
|||
|
|
" <td>325.000</td>\n",
|
|||
|
|
" <td>-204.000</td>\n",
|
|||
|
|
" <td>6.0</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.002254</td>\n",
|
|||
|
|
" <td>0.009854</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>-0.053</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
|
|||
|
|
"0 18872 FR0010135103 2015-01-31 49094.915 3.242523e+07 \n",
|
|||
|
|
"1 18872 FR0010135103 2015-02-28 49797.915 3.368032e+07 \n",
|
|||
|
|
"2 18872 FR0010135103 2015-03-31 50302.627 3.505691e+07 \n",
|
|||
|
|
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
|
|||
|
|
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
|
|||
|
|
"\n",
|
|||
|
|
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
|
|||
|
|
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
|
|||
|
|
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
|
|||
|
|
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
|
|||
|
|
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
|
|||
|
|
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
|
|||
|
|
"0 9.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"1 12.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"2 12.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"3 11.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"4 6.0 Switzerland Switzerland 1 1 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_to_aum_rel turnover_rel ret_fund_m ret_bench_m active_return_m \\\n",
|
|||
|
|
"0 0.013728 0.019473 0.000000 0.0 0.0 \n",
|
|||
|
|
"1 0.019840 0.034379 0.121368 0.0 0.0 \n",
|
|||
|
|
"2 0.000193 0.028780 0.068598 0.0 0.0 \n",
|
|||
|
|
"3 -0.002454 0.034015 0.000000 0.0 0.0 \n",
|
|||
|
|
"4 0.002254 0.009854 0.000000 0.0 0.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" delta_rate_m \n",
|
|||
|
|
"0 -0.058 \n",
|
|||
|
|
"1 -0.022 \n",
|
|||
|
|
"2 -0.014 \n",
|
|||
|
|
"3 -0.077 \n",
|
|||
|
|
"4 -0.053 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 115,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Ajout\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_nav_m = (\n",
|
|||
|
|
" df_nav\n",
|
|||
|
|
" .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
|
|||
|
|
" .sort_values([NAV_ISIN_COL, \"month\"])\n",
|
|||
|
|
" .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .tail(1)\n",
|
|||
|
|
" .copy()\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
|
|||
|
|
"df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
|
|||
|
|
"df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_nav_m = df_nav_m.rename(columns={NAV_ISIN_COL: ISIN_COL})\n",
|
|||
|
|
"df_nav_m = df_nav_m[[ISIN_COL, \"month\", \"ret_fund_m\", \"ret_bench_m\", \"active_return_m\"]]\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rates_m = (\n",
|
|||
|
|
" df_rates\n",
|
|||
|
|
" .dropna(subset=[\"month\", RATE_VAL_COL])\n",
|
|||
|
|
" .sort_values(RATE_DATE_COL)\n",
|
|||
|
|
" .groupby(\"month\", as_index=False)\n",
|
|||
|
|
" .tail(1)\n",
|
|||
|
|
" .copy()\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
|
|||
|
|
"df_rates_m = df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]]\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
" \n",
|
|||
|
|
"df_rel_m = df_rel_m.merge(\n",
|
|||
|
|
" df_nav_m,\n",
|
|||
|
|
" on=[ISIN_COL, \"month\"],\n",
|
|||
|
|
" how=\"left\"\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m = df_rel_m.merge(\n",
|
|||
|
|
" df_rates_m[[\"month\", \"delta_rate_m\"]],\n",
|
|||
|
|
" on=\"month\",\n",
|
|||
|
|
" how=\"left\"\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"for c in [\"ret_fund_m\", \"ret_bench_m\", \"active_return_m\", \"delta_rate_m\"]:\n",
|
|||
|
|
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_m.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 281,
|
|||
|
|
"id": "a5ef77fe-31d3-4cb2-b8dd-19297b804f8d",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"(34486, 23)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>month</th>\n",
|
|||
|
|
" <th>aum_qty</th>\n",
|
|||
|
|
" <th>aum_val</th>\n",
|
|||
|
|
" <th>net_flow_qty</th>\n",
|
|||
|
|
" <th>gross_flow_qty</th>\n",
|
|||
|
|
" <th>sub_qty</th>\n",
|
|||
|
|
" <th>red_qty</th>\n",
|
|||
|
|
" <th>n_tx</th>\n",
|
|||
|
|
" <th>n_isin_held</th>\n",
|
|||
|
|
" <th>n_isin_active</th>\n",
|
|||
|
|
" <th>delta_rate_m</th>\n",
|
|||
|
|
" <th>region</th>\n",
|
|||
|
|
" <th>country</th>\n",
|
|||
|
|
" <th>ret_fund_m</th>\n",
|
|||
|
|
" <th>ret_bench_m</th>\n",
|
|||
|
|
" <th>active_month</th>\n",
|
|||
|
|
" <th>flow_to_aum_m</th>\n",
|
|||
|
|
" <th>turnover_m</th>\n",
|
|||
|
|
" <th>sub_share_m</th>\n",
|
|||
|
|
" <th>red_share_m</th>\n",
|
|||
|
|
" <th>aum_peak_to_date</th>\n",
|
|||
|
|
" <th>aum_drawdown</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>179864.637</td>\n",
|
|||
|
|
" <td>7.043266e+07</td>\n",
|
|||
|
|
" <td>-1524.010</td>\n",
|
|||
|
|
" <td>15230.010</td>\n",
|
|||
|
|
" <td>6897.990</td>\n",
|
|||
|
|
" <td>-8422.000</td>\n",
|
|||
|
|
" <td>32.0</td>\n",
|
|||
|
|
" <td>29</td>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>-0.058</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>0.016384</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-0.008473</td>\n",
|
|||
|
|
" <td>0.084675</td>\n",
|
|||
|
|
" <td>0.452921</td>\n",
|
|||
|
|
" <td>-0.552987</td>\n",
|
|||
|
|
" <td>179864.637</td>\n",
|
|||
|
|
" <td>5.551115e-15</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-02-28</td>\n",
|
|||
|
|
" <td>186761.736</td>\n",
|
|||
|
|
" <td>7.317400e+07</td>\n",
|
|||
|
|
" <td>7247.100</td>\n",
|
|||
|
|
" <td>18571.880</td>\n",
|
|||
|
|
" <td>13219.490</td>\n",
|
|||
|
|
" <td>-5972.390</td>\n",
|
|||
|
|
" <td>38.0</td>\n",
|
|||
|
|
" <td>29</td>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>-0.022</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>0.036066</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.038804</td>\n",
|
|||
|
|
" <td>0.099442</td>\n",
|
|||
|
|
" <td>0.711801</td>\n",
|
|||
|
|
" <td>-0.321582</td>\n",
|
|||
|
|
" <td>186761.736</td>\n",
|
|||
|
|
" <td>5.329071e-15</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>190357.718</td>\n",
|
|||
|
|
" <td>7.653007e+07</td>\n",
|
|||
|
|
" <td>3655.380</td>\n",
|
|||
|
|
" <td>9754.040</td>\n",
|
|||
|
|
" <td>6767.710</td>\n",
|
|||
|
|
" <td>-3112.330</td>\n",
|
|||
|
|
" <td>47.0</td>\n",
|
|||
|
|
" <td>29</td>\n",
|
|||
|
|
" <td>14</td>\n",
|
|||
|
|
" <td>-0.014</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>0.037925</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.019203</td>\n",
|
|||
|
|
" <td>0.051241</td>\n",
|
|||
|
|
" <td>0.693837</td>\n",
|
|||
|
|
" <td>-0.319081</td>\n",
|
|||
|
|
" <td>190357.718</td>\n",
|
|||
|
|
" <td>5.218048e-15</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-04-30</td>\n",
|
|||
|
|
" <td>191429.324</td>\n",
|
|||
|
|
" <td>7.509285e+07</td>\n",
|
|||
|
|
" <td>-218.394</td>\n",
|
|||
|
|
" <td>12840.950</td>\n",
|
|||
|
|
" <td>6384.278</td>\n",
|
|||
|
|
" <td>-6602.672</td>\n",
|
|||
|
|
" <td>39.0</td>\n",
|
|||
|
|
" <td>29</td>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>-0.077</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>0.046378</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-0.001141</td>\n",
|
|||
|
|
" <td>0.067079</td>\n",
|
|||
|
|
" <td>0.497181</td>\n",
|
|||
|
|
" <td>-0.514189</td>\n",
|
|||
|
|
" <td>191429.324</td>\n",
|
|||
|
|
" <td>5.218048e-15</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>2015-05-31</td>\n",
|
|||
|
|
" <td>189056.475</td>\n",
|
|||
|
|
" <td>7.650176e+07</td>\n",
|
|||
|
|
" <td>-4782.849</td>\n",
|
|||
|
|
" <td>6332.849</td>\n",
|
|||
|
|
" <td>775.000</td>\n",
|
|||
|
|
" <td>-5557.849</td>\n",
|
|||
|
|
" <td>24.0</td>\n",
|
|||
|
|
" <td>29</td>\n",
|
|||
|
|
" <td>9</td>\n",
|
|||
|
|
" <td>-0.053</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>0.000093</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-0.025299</td>\n",
|
|||
|
|
" <td>0.033497</td>\n",
|
|||
|
|
" <td>0.122378</td>\n",
|
|||
|
|
" <td>-0.877622</td>\n",
|
|||
|
|
" <td>191429.324</td>\n",
|
|||
|
|
" <td>1.239543e-02</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID month aum_qty aum_val net_flow_qty \\\n",
|
|||
|
|
"0 18872 2015-01-31 179864.637 7.043266e+07 -1524.010 \n",
|
|||
|
|
"1 18872 2015-02-28 186761.736 7.317400e+07 7247.100 \n",
|
|||
|
|
"2 18872 2015-03-31 190357.718 7.653007e+07 3655.380 \n",
|
|||
|
|
"3 18872 2015-04-30 191429.324 7.509285e+07 -218.394 \n",
|
|||
|
|
"4 18872 2015-05-31 189056.475 7.650176e+07 -4782.849 \n",
|
|||
|
|
"\n",
|
|||
|
|
" gross_flow_qty sub_qty red_qty n_tx n_isin_held n_isin_active \\\n",
|
|||
|
|
"0 15230.010 6897.990 -8422.000 32.0 29 13 \n",
|
|||
|
|
"1 18571.880 13219.490 -5972.390 38.0 29 13 \n",
|
|||
|
|
"2 9754.040 6767.710 -3112.330 47.0 29 14 \n",
|
|||
|
|
"3 12840.950 6384.278 -6602.672 39.0 29 13 \n",
|
|||
|
|
"4 6332.849 775.000 -5557.849 24.0 29 9 \n",
|
|||
|
|
"\n",
|
|||
|
|
" delta_rate_m region country ret_fund_m ret_bench_m \\\n",
|
|||
|
|
"0 -0.058 Switzerland Switzerland 0.016384 0.0 \n",
|
|||
|
|
"1 -0.022 Switzerland Switzerland 0.036066 0.0 \n",
|
|||
|
|
"2 -0.014 Switzerland Switzerland 0.037925 0.0 \n",
|
|||
|
|
"3 -0.077 Switzerland Switzerland 0.046378 0.0 \n",
|
|||
|
|
"4 -0.053 Switzerland Switzerland 0.000093 0.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" active_month flow_to_aum_m turnover_m sub_share_m red_share_m \\\n",
|
|||
|
|
"0 1 -0.008473 0.084675 0.452921 -0.552987 \n",
|
|||
|
|
"1 1 0.038804 0.099442 0.711801 -0.321582 \n",
|
|||
|
|
"2 1 0.019203 0.051241 0.693837 -0.319081 \n",
|
|||
|
|
"3 1 -0.001141 0.067079 0.497181 -0.514189 \n",
|
|||
|
|
"4 1 -0.025299 0.033497 0.122378 -0.877622 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_peak_to_date aum_drawdown \n",
|
|||
|
|
"0 179864.637 5.551115e-15 \n",
|
|||
|
|
"1 186761.736 5.329071e-15 \n",
|
|||
|
|
"2 190357.718 5.218048e-15 \n",
|
|||
|
|
"3 191429.324 5.218048e-15 \n",
|
|||
|
|
"4 191429.324 1.239543e-02 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 281,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Données agrégées sur les ISIN\n",
|
|||
|
|
"\n",
|
|||
|
|
"# =========================\n",
|
|||
|
|
"# ULTRA LIGHT VERSION\n",
|
|||
|
|
"# =========================\n",
|
|||
|
|
"\n",
|
|||
|
|
"tmp = df_rel_m.copy()\n",
|
|||
|
|
"tmp[\"isin_held_flag\"] = (tmp[\"aum_qty\"] > 0).astype(int)\n",
|
|||
|
|
"tmp[\"isin_active_flag\"] = (tmp[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"tmp[\"aum_total\"] = tmp.groupby([ID_COL, \"month\"])[\"aum_qty\"].transform(\"sum\")\n",
|
|||
|
|
"tmp[\"w\"] = tmp[\"aum_qty\"] / (tmp[\"aum_total\"] + 1e-12)\n",
|
|||
|
|
"tmp[\"ret_fund_w\"] = tmp[\"w\"] * tmp[\"ret_fund_m\"]\n",
|
|||
|
|
"tmp[\"ret_bench_w\"] = tmp[\"w\"] * tmp[\"ret_bench_m\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month = (\n",
|
|||
|
|
" tmp.groupby([ID_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" aum_qty=(\"aum_qty\", \"sum\"),\n",
|
|||
|
|
" aum_val=(\"aum_val\", \"sum\"),\n",
|
|||
|
|
" net_flow_qty=(\"net_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" sub_qty=(\"sub_qty\", \"sum\"),\n",
|
|||
|
|
" red_qty=(\"red_qty\", \"sum\"),\n",
|
|||
|
|
" n_tx=(\"n_tx\", \"sum\"),\n",
|
|||
|
|
" n_isin_held=(\"isin_held_flag\", \"sum\"),\n",
|
|||
|
|
" n_isin_active=(\"isin_active_flag\", \"sum\"),\n",
|
|||
|
|
" delta_rate_m=(\"delta_rate_m\", \"first\"),\n",
|
|||
|
|
" region=(\"region\", \"first\"),\n",
|
|||
|
|
" country=(\"country\", \"first\"),\n",
|
|||
|
|
" ret_fund_m=(\"ret_fund_w\", \"sum\"),\n",
|
|||
|
|
" ret_bench_m=(\"ret_bench_w\", \"sum\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values([ID_COL, \"month\"])\n",
|
|||
|
|
" .reset_index(drop=True)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"df_month[\"flow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
|
|||
|
|
"df_month[\"turnover_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
|
|||
|
|
"df_month[\"sub_share_m\"] = df_month[\"sub_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
|
|||
|
|
"df_month[\"red_share_m\"] = df_month[\"red_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month[\"aum_peak_to_date\"] = df_month.groupby(ID_COL)[\"aum_qty\"].cummax()\n",
|
|||
|
|
"df_month[\"aum_drawdown\"] = 1 - (df_month[\"aum_qty\"] / (df_month[\"aum_peak_to_date\"] + EPS))\n",
|
|||
|
|
"df_month = df_month[df_month[\"month\"] <= '2025-10-31']\n",
|
|||
|
|
"print(df_month.shape)\n",
|
|||
|
|
"df_month.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 282,
|
|||
|
|
"id": "6153ffef-6b83-4af9-98f2-b18e8f5d3dbb",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Product - Isin</th>\n",
|
|||
|
|
" <th>rel_n_months</th>\n",
|
|||
|
|
" <th>rel_active_months</th>\n",
|
|||
|
|
" <th>rel_holding_months</th>\n",
|
|||
|
|
" <th>rel_aum_mean</th>\n",
|
|||
|
|
" <th>rel_turnover_mean</th>\n",
|
|||
|
|
" <th>rel_turnover_vol</th>\n",
|
|||
|
|
" <th>rel_flow_to_aum_vol</th>\n",
|
|||
|
|
" <th>rel_n_tx</th>\n",
|
|||
|
|
" <th>rel_full_exit_count</th>\n",
|
|||
|
|
" <th>rel_entry_count</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>91</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>15174.558362</td>\n",
|
|||
|
|
" <td>3.317824e-02</td>\n",
|
|||
|
|
" <td>5.976183e-02</td>\n",
|
|||
|
|
" <td>6.076097e-02</td>\n",
|
|||
|
|
" <td>382.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010147603</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>8</td>\n",
|
|||
|
|
" <td>63</td>\n",
|
|||
|
|
" <td>891.837500</td>\n",
|
|||
|
|
" <td>3.750000e+09</td>\n",
|
|||
|
|
" <td>3.354102e+10</td>\n",
|
|||
|
|
" <td>3.354102e+10</td>\n",
|
|||
|
|
" <td>9.0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010148981</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>66</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>2759.100492</td>\n",
|
|||
|
|
" <td>1.870291e-02</td>\n",
|
|||
|
|
" <td>4.154345e-02</td>\n",
|
|||
|
|
" <td>4.333886e-02</td>\n",
|
|||
|
|
" <td>149.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010149112</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>12</td>\n",
|
|||
|
|
" <td>50</td>\n",
|
|||
|
|
" <td>2321.171212</td>\n",
|
|||
|
|
" <td>2.750000e+09</td>\n",
|
|||
|
|
" <td>1.815719e+10</td>\n",
|
|||
|
|
" <td>1.815719e+10</td>\n",
|
|||
|
|
" <td>13.0</td>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>FR0010149120</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>79</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1778.918100</td>\n",
|
|||
|
|
" <td>6.219999e-02</td>\n",
|
|||
|
|
" <td>1.813014e-01</td>\n",
|
|||
|
|
" <td>1.801645e-01</td>\n",
|
|||
|
|
" <td>152.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID Product - Isin rel_n_months rel_active_months \\\n",
|
|||
|
|
"0 18872 FR0010135103 130 91 \n",
|
|||
|
|
"1 18872 FR0010147603 80 8 \n",
|
|||
|
|
"2 18872 FR0010148981 130 66 \n",
|
|||
|
|
"3 18872 FR0010149112 80 12 \n",
|
|||
|
|
"4 18872 FR0010149120 130 79 \n",
|
|||
|
|
"\n",
|
|||
|
|
" rel_holding_months rel_aum_mean rel_turnover_mean rel_turnover_vol \\\n",
|
|||
|
|
"0 130 15174.558362 3.317824e-02 5.976183e-02 \n",
|
|||
|
|
"1 63 891.837500 3.750000e+09 3.354102e+10 \n",
|
|||
|
|
"2 130 2759.100492 1.870291e-02 4.154345e-02 \n",
|
|||
|
|
"3 50 2321.171212 2.750000e+09 1.815719e+10 \n",
|
|||
|
|
"4 130 1778.918100 6.219999e-02 1.813014e-01 \n",
|
|||
|
|
"\n",
|
|||
|
|
" rel_flow_to_aum_vol rel_n_tx rel_full_exit_count rel_entry_count \n",
|
|||
|
|
"0 6.076097e-02 382.0 0 1 \n",
|
|||
|
|
"1 3.354102e+10 9.0 1 1 \n",
|
|||
|
|
"2 4.333886e-02 149.0 0 1 \n",
|
|||
|
|
"3 1.815719e+10 13.0 2 2 \n",
|
|||
|
|
"4 1.801645e-01 152.0 0 1 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 282,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Données agrégées sur les mois\n",
|
|||
|
|
"tmp = df_rel_m.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
|
|||
|
|
"tmp[\"prev_aum\"] = tmp.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
|
|||
|
|
"tmp[\"full_exit_event\"] = ((tmp[\"prev_aum\"] > 0) & (tmp[\"aum_qty\"] <= 0)).astype(int)\n",
|
|||
|
|
"tmp[\"entry_event\"] = ((tmp[\"prev_aum\"].fillna(0) <= 0) & (tmp[\"aum_qty\"] > 0)).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_feat = (\n",
|
|||
|
|
" tmp.groupby([ID_COL, ISIN_COL], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" rel_n_months=(\"month\", \"nunique\"),\n",
|
|||
|
|
" rel_active_months=(\"active_rel_month\", \"sum\"),\n",
|
|||
|
|
" rel_holding_months=(\"holding_rel_month\", \"sum\"),\n",
|
|||
|
|
" rel_aum_mean=(\"aum_qty\", \"mean\"),\n",
|
|||
|
|
" rel_turnover_mean=(\"turnover_rel\", \"mean\"),\n",
|
|||
|
|
" rel_turnover_vol=(\"turnover_rel\", \"std\"),\n",
|
|||
|
|
" rel_flow_to_aum_vol=(\"flow_to_aum_rel\", \"std\"),\n",
|
|||
|
|
" rel_n_tx=(\"n_tx\", \"sum\"),\n",
|
|||
|
|
" rel_full_exit_count=(\"full_exit_event\", \"sum\"),\n",
|
|||
|
|
" rel_entry_count=(\"entry_event\", \"sum\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_rel_feat.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 283,
|
|||
|
|
"id": "b598f097-4d34-4922-88ac-f524078b3102",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"(431, 40)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>n_months</th>\n",
|
|||
|
|
" <th>n_active_months</th>\n",
|
|||
|
|
" <th>flow_freq</th>\n",
|
|||
|
|
" <th>aum_qty_mean</th>\n",
|
|||
|
|
" <th>aum_qty_median</th>\n",
|
|||
|
|
" <th>aum_qty_max</th>\n",
|
|||
|
|
" <th>aum_qty_last</th>\n",
|
|||
|
|
" <th>net_flow_qty_sum</th>\n",
|
|||
|
|
" <th>gross_flow_qty_sum</th>\n",
|
|||
|
|
" <th>gross_flow_qty_mean</th>\n",
|
|||
|
|
" <th>n_tx_total</th>\n",
|
|||
|
|
" <th>net_flow_vol</th>\n",
|
|||
|
|
" <th>turnover_mean</th>\n",
|
|||
|
|
" <th>turnover_vol</th>\n",
|
|||
|
|
" <th>flow_to_aum_mean</th>\n",
|
|||
|
|
" <th>flow_to_aum_vol</th>\n",
|
|||
|
|
" <th>avg_n_isin_held</th>\n",
|
|||
|
|
" <th>max_n_isin_held</th>\n",
|
|||
|
|
" <th>sub_share_mean</th>\n",
|
|||
|
|
" <th>red_share_mean</th>\n",
|
|||
|
|
" <th>delta_rate_mean</th>\n",
|
|||
|
|
" <th>aum_drawdown_last</th>\n",
|
|||
|
|
" <th>aum_drawdown_max</th>\n",
|
|||
|
|
" <th>region</th>\n",
|
|||
|
|
" <th>country</th>\n",
|
|||
|
|
" <th>n_isin_total</th>\n",
|
|||
|
|
" <th>rel_turnover_mean_avg</th>\n",
|
|||
|
|
" <th>rel_turnover_vol_avg</th>\n",
|
|||
|
|
" <th>rel_flow_to_aum_vol_avg</th>\n",
|
|||
|
|
" <th>full_exit_count</th>\n",
|
|||
|
|
" <th>entry_count</th>\n",
|
|||
|
|
" <th>avg_holding_months_per_isin</th>\n",
|
|||
|
|
" <th>max_holding_months_per_isin</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag3</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag6</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag3</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag6</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag3</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag6</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>143505.697354</td>\n",
|
|||
|
|
" <td>144653.1645</td>\n",
|
|||
|
|
" <td>211049.3200</td>\n",
|
|||
|
|
" <td>166752.2080</td>\n",
|
|||
|
|
" <td>-45677.1480</td>\n",
|
|||
|
|
" <td>1.244126e+06</td>\n",
|
|||
|
|
" <td>9570.200015</td>\n",
|
|||
|
|
" <td>1926.0</td>\n",
|
|||
|
|
" <td>9832.357264</td>\n",
|
|||
|
|
" <td>0.069449</td>\n",
|
|||
|
|
" <td>0.072727</td>\n",
|
|||
|
|
" <td>-0.003918</td>\n",
|
|||
|
|
" <td>0.074207</td>\n",
|
|||
|
|
" <td>39.669231</td>\n",
|
|||
|
|
" <td>50</td>\n",
|
|||
|
|
" <td>0.429844</td>\n",
|
|||
|
|
" <td>-0.576520</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>2.098899e-01</td>\n",
|
|||
|
|
" <td>0.715200</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>108</td>\n",
|
|||
|
|
" <td>1.583591e+10</td>\n",
|
|||
|
|
" <td>1.157253e+11</td>\n",
|
|||
|
|
" <td>1.157601e+11</td>\n",
|
|||
|
|
" <td>86</td>\n",
|
|||
|
|
" <td>126</td>\n",
|
|||
|
|
" <td>47.750000</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.164687</td>\n",
|
|||
|
|
" <td>0.091805</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.125209</td>\n",
|
|||
|
|
" <td>-0.133345</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>200000076</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>119</td>\n",
|
|||
|
|
" <td>0.915385</td>\n",
|
|||
|
|
" <td>24141.541138</td>\n",
|
|||
|
|
" <td>19888.8255</td>\n",
|
|||
|
|
" <td>69211.1070</td>\n",
|
|||
|
|
" <td>69211.1070</td>\n",
|
|||
|
|
" <td>54791.9840</td>\n",
|
|||
|
|
" <td>2.314415e+05</td>\n",
|
|||
|
|
" <td>1780.319492</td>\n",
|
|||
|
|
" <td>518.0</td>\n",
|
|||
|
|
" <td>2838.000232</td>\n",
|
|||
|
|
" <td>0.083230</td>\n",
|
|||
|
|
" <td>0.138485</td>\n",
|
|||
|
|
" <td>-0.000893</td>\n",
|
|||
|
|
" <td>0.152321</td>\n",
|
|||
|
|
" <td>7.430769</td>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>0.508681</td>\n",
|
|||
|
|
" <td>-0.415876</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.454392e-14</td>\n",
|
|||
|
|
" <td>0.871392</td>\n",
|
|||
|
|
" <td>Spain</td>\n",
|
|||
|
|
" <td>Spain</td>\n",
|
|||
|
|
" <td>22</td>\n",
|
|||
|
|
" <td>1.398598e+10</td>\n",
|
|||
|
|
" <td>8.307063e+10</td>\n",
|
|||
|
|
" <td>8.354432e+10</td>\n",
|
|||
|
|
" <td>14</td>\n",
|
|||
|
|
" <td>24</td>\n",
|
|||
|
|
" <td>43.909091</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.026759</td>\n",
|
|||
|
|
" <td>0.127745</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.035413</td>\n",
|
|||
|
|
" <td>0.023472</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>200000082</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>422994.464523</td>\n",
|
|||
|
|
" <td>462973.7880</td>\n",
|
|||
|
|
" <td>580174.7570</td>\n",
|
|||
|
|
" <td>490991.5590</td>\n",
|
|||
|
|
" <td>178371.1590</td>\n",
|
|||
|
|
" <td>2.327246e+06</td>\n",
|
|||
|
|
" <td>17901.894469</td>\n",
|
|||
|
|
" <td>7103.0</td>\n",
|
|||
|
|
" <td>13288.481111</td>\n",
|
|||
|
|
" <td>0.047480</td>\n",
|
|||
|
|
" <td>0.037140</td>\n",
|
|||
|
|
" <td>0.005194</td>\n",
|
|||
|
|
" <td>0.038831</td>\n",
|
|||
|
|
" <td>7.430769</td>\n",
|
|||
|
|
" <td>15</td>\n",
|
|||
|
|
" <td>0.467005</td>\n",
|
|||
|
|
" <td>-0.562929</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.537178e-01</td>\n",
|
|||
|
|
" <td>0.302866</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>18</td>\n",
|
|||
|
|
" <td>2.058983e+09</td>\n",
|
|||
|
|
" <td>1.422679e+10</td>\n",
|
|||
|
|
" <td>1.313063e+10</td>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>17</td>\n",
|
|||
|
|
" <td>53.666667</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>-0.096172</td>\n",
|
|||
|
|
" <td>-0.179151</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.173013</td>\n",
|
|||
|
|
" <td>-0.174161</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>200000146</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>212108.397869</td>\n",
|
|||
|
|
" <td>210616.5330</td>\n",
|
|||
|
|
" <td>536769.8760</td>\n",
|
|||
|
|
" <td>536769.8760</td>\n",
|
|||
|
|
" <td>457533.3310</td>\n",
|
|||
|
|
" <td>1.150546e+06</td>\n",
|
|||
|
|
" <td>8850.350438</td>\n",
|
|||
|
|
" <td>4774.0</td>\n",
|
|||
|
|
" <td>10074.748210</td>\n",
|
|||
|
|
" <td>0.051622</td>\n",
|
|||
|
|
" <td>0.066995</td>\n",
|
|||
|
|
" <td>0.024910</td>\n",
|
|||
|
|
" <td>0.075092</td>\n",
|
|||
|
|
" <td>18.369231</td>\n",
|
|||
|
|
" <td>26</td>\n",
|
|||
|
|
" <td>0.517815</td>\n",
|
|||
|
|
" <td>-0.556667</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.998401e-15</td>\n",
|
|||
|
|
" <td>0.461533</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>33</td>\n",
|
|||
|
|
" <td>1.339995e+09</td>\n",
|
|||
|
|
" <td>1.201877e+10</td>\n",
|
|||
|
|
" <td>9.821426e+09</td>\n",
|
|||
|
|
" <td>20</td>\n",
|
|||
|
|
" <td>42</td>\n",
|
|||
|
|
" <td>72.363636</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.047976</td>\n",
|
|||
|
|
" <td>-0.183338</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.139586</td>\n",
|
|||
|
|
" <td>-0.109310</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>200000147</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>145729.199224</td>\n",
|
|||
|
|
" <td>79260.8255</td>\n",
|
|||
|
|
" <td>530740.2621</td>\n",
|
|||
|
|
" <td>530740.2621</td>\n",
|
|||
|
|
" <td>677492.4351</td>\n",
|
|||
|
|
" <td>1.213963e+06</td>\n",
|
|||
|
|
" <td>9338.178685</td>\n",
|
|||
|
|
" <td>7585.0</td>\n",
|
|||
|
|
" <td>13868.197522</td>\n",
|
|||
|
|
" <td>0.061164</td>\n",
|
|||
|
|
" <td>0.058200</td>\n",
|
|||
|
|
" <td>0.022213</td>\n",
|
|||
|
|
" <td>0.059810</td>\n",
|
|||
|
|
" <td>46.576923</td>\n",
|
|||
|
|
" <td>54</td>\n",
|
|||
|
|
" <td>0.598820</td>\n",
|
|||
|
|
" <td>-0.448555</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.998401e-15</td>\n",
|
|||
|
|
" <td>0.905503</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>75</td>\n",
|
|||
|
|
" <td>9.390666e+09</td>\n",
|
|||
|
|
" <td>3.943802e+10</td>\n",
|
|||
|
|
" <td>3.943366e+10</td>\n",
|
|||
|
|
" <td>56</td>\n",
|
|||
|
|
" <td>102</td>\n",
|
|||
|
|
" <td>80.733333</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.103747</td>\n",
|
|||
|
|
" <td>0.126239</td>\n",
|
|||
|
|
" <td>-0.049229</td>\n",
|
|||
|
|
" <td>-0.009332</td>\n",
|
|||
|
|
" <td>-0.270671</td>\n",
|
|||
|
|
" <td>-0.216742</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
|
|||
|
|
"0 18872 130 130 1.000000 143505.697354 \n",
|
|||
|
|
"1 200000076 130 119 0.915385 24141.541138 \n",
|
|||
|
|
"2 200000082 130 130 1.000000 422994.464523 \n",
|
|||
|
|
"3 200000146 130 130 1.000000 212108.397869 \n",
|
|||
|
|
"4 200000147 130 130 1.000000 145729.199224 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
|
|||
|
|
"0 144653.1645 211049.3200 166752.2080 -45677.1480 \n",
|
|||
|
|
"1 19888.8255 69211.1070 69211.1070 54791.9840 \n",
|
|||
|
|
"2 462973.7880 580174.7570 490991.5590 178371.1590 \n",
|
|||
|
|
"3 210616.5330 536769.8760 536769.8760 457533.3310 \n",
|
|||
|
|
"4 79260.8255 530740.2621 530740.2621 677492.4351 \n",
|
|||
|
|
"\n",
|
|||
|
|
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
|
|||
|
|
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
|
|||
|
|
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
|
|||
|
|
"2 2.327246e+06 17901.894469 7103.0 13288.481111 \n",
|
|||
|
|
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
|
|||
|
|
"4 1.213963e+06 9338.178685 7585.0 13868.197522 \n",
|
|||
|
|
"\n",
|
|||
|
|
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
|
|||
|
|
"0 0.069449 0.072727 -0.003918 0.074207 \n",
|
|||
|
|
"1 0.083230 0.138485 -0.000893 0.152321 \n",
|
|||
|
|
"2 0.047480 0.037140 0.005194 0.038831 \n",
|
|||
|
|
"3 0.051622 0.066995 0.024910 0.075092 \n",
|
|||
|
|
"4 0.061164 0.058200 0.022213 0.059810 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
|
|||
|
|
"0 39.669231 50 0.429844 -0.576520 \n",
|
|||
|
|
"1 7.430769 13 0.508681 -0.415876 \n",
|
|||
|
|
"2 7.430769 15 0.467005 -0.562929 \n",
|
|||
|
|
"3 18.369231 26 0.517815 -0.556667 \n",
|
|||
|
|
"4 46.576923 54 0.598820 -0.448555 \n",
|
|||
|
|
"\n",
|
|||
|
|
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
|
|||
|
|
"0 0.013723 2.098899e-01 0.715200 Switzerland \n",
|
|||
|
|
"1 0.013723 1.454392e-14 0.871392 Spain \n",
|
|||
|
|
"2 0.013723 1.537178e-01 0.302866 Italy \n",
|
|||
|
|
"3 0.013723 1.998401e-15 0.461533 Italy \n",
|
|||
|
|
"4 0.013723 1.998401e-15 0.905503 Italy \n",
|
|||
|
|
"\n",
|
|||
|
|
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
|
|||
|
|
"0 Switzerland 108 1.583591e+10 1.157253e+11 \n",
|
|||
|
|
"1 Spain 22 1.398598e+10 8.307063e+10 \n",
|
|||
|
|
"2 Italy 18 2.058983e+09 1.422679e+10 \n",
|
|||
|
|
"3 Italy 33 1.339995e+09 1.201877e+10 \n",
|
|||
|
|
"4 Italy 75 9.390666e+09 3.943802e+10 \n",
|
|||
|
|
"\n",
|
|||
|
|
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
|
|||
|
|
"0 1.157601e+11 86 126 \n",
|
|||
|
|
"1 8.354432e+10 14 24 \n",
|
|||
|
|
"2 1.313063e+10 10 17 \n",
|
|||
|
|
"3 9.821426e+09 20 42 \n",
|
|||
|
|
"4 3.943366e+10 56 102 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
|
|||
|
|
"0 47.750000 130 \n",
|
|||
|
|
"1 43.909091 130 \n",
|
|||
|
|
"2 53.666667 130 \n",
|
|||
|
|
"3 72.363636 130 \n",
|
|||
|
|
"4 80.733333 130 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
|
|||
|
|
"0 0.164687 0.091805 NaN \n",
|
|||
|
|
"1 0.026759 0.127745 NaN \n",
|
|||
|
|
"2 -0.096172 -0.179151 NaN \n",
|
|||
|
|
"3 0.047976 -0.183338 NaN \n",
|
|||
|
|
"4 0.103747 0.126239 -0.049229 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \n",
|
|||
|
|
"0 NaN -0.125209 -0.133345 \n",
|
|||
|
|
"1 NaN -0.035413 0.023472 \n",
|
|||
|
|
"2 NaN -0.173013 -0.174161 \n",
|
|||
|
|
"3 NaN -0.139586 -0.109310 \n",
|
|||
|
|
"4 -0.009332 -0.270671 -0.216742 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 283,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Données agrégées sur les ISIN et sur les mois\n",
|
|||
|
|
"df_rel_client = (\n",
|
|||
|
|
" df_rel_feat\n",
|
|||
|
|
" .groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_isin_total=(ISIN_COL, \"nunique\"),\n",
|
|||
|
|
" rel_turnover_mean_avg=(\"rel_turnover_mean\", \"mean\"),\n",
|
|||
|
|
" rel_turnover_vol_avg=(\"rel_turnover_vol\", \"mean\"),\n",
|
|||
|
|
" rel_flow_to_aum_vol_avg=(\"rel_flow_to_aum_vol\", \"mean\"),\n",
|
|||
|
|
" full_exit_count=(\"rel_full_exit_count\", \"sum\"),\n",
|
|||
|
|
" entry_count=(\"rel_entry_count\", \"sum\"),\n",
|
|||
|
|
" avg_holding_months_per_isin=(\"rel_holding_months\", \"mean\"),\n",
|
|||
|
|
" max_holding_months_per_isin=(\"rel_holding_months\", \"max\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_client = (\n",
|
|||
|
|
" df_month\n",
|
|||
|
|
" .groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_months=(\"month\", \"nunique\"),\n",
|
|||
|
|
" n_active_months=(\"active_month\", \"sum\"),\n",
|
|||
|
|
" flow_freq=(\"active_month\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
|
|||
|
|
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
|
|||
|
|
" aum_qty_max=(\"aum_qty\", \"max\"),\n",
|
|||
|
|
" aum_qty_last=(\"aum_qty\", \"last\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
|
|||
|
|
" n_tx_total=(\"n_tx\", \"sum\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" net_flow_vol=(\"net_flow_qty\", \"std\"),\n",
|
|||
|
|
" turnover_mean=(\"turnover_m\", \"mean\"),\n",
|
|||
|
|
" turnover_vol=(\"turnover_m\", \"std\"),\n",
|
|||
|
|
" flow_to_aum_mean=(\"flow_to_aum_m\", \"mean\"),\n",
|
|||
|
|
" flow_to_aum_vol=(\"flow_to_aum_m\", \"std\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_n_isin_held=(\"n_isin_held\", \"mean\"),\n",
|
|||
|
|
" max_n_isin_held=(\"n_isin_held\", \"max\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" sub_share_mean=(\"sub_share_m\", \"mean\"),\n",
|
|||
|
|
" red_share_mean=(\"red_share_m\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" delta_rate_mean=(\"delta_rate_m\", \"mean\"),\n",
|
|||
|
|
" aum_drawdown_last=(\"aum_drawdown\", \"last\"),\n",
|
|||
|
|
" aum_drawdown_max=(\"aum_drawdown\", \"max\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" region=(\"region\", \"last\"),\n",
|
|||
|
|
" country=(\"country\", \"last\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_client = df_client.merge(df_rel_client, on=ID_COL, how=\"left\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"#Variables de corrélations entre performance et flux\n",
|
|||
|
|
"def corr_lag(x, y, lag):\n",
|
|||
|
|
" x = np.asarray(x, dtype=float)\n",
|
|||
|
|
" y = np.asarray(y, dtype=float)\n",
|
|||
|
|
" \n",
|
|||
|
|
" mask = np.isfinite(x) & np.isfinite(y)\n",
|
|||
|
|
" x, y = x[mask], y[mask]\n",
|
|||
|
|
" \n",
|
|||
|
|
" if len(x) <= lag + 3:\n",
|
|||
|
|
" return np.nan\n",
|
|||
|
|
" \n",
|
|||
|
|
" return pd.Series(x[lag:]).corr(pd.Series(y[:-lag]))\n",
|
|||
|
|
"\n",
|
|||
|
|
"rows = []\n",
|
|||
|
|
"\n",
|
|||
|
|
"for acc, g in df_month.groupby(ID_COL):\n",
|
|||
|
|
" g = g.sort_values(\"month\")\n",
|
|||
|
|
" \n",
|
|||
|
|
" flow = g[\"flow_to_aum_m\"].values\n",
|
|||
|
|
" ret_fund = g[\"ret_fund_m\"].values\n",
|
|||
|
|
" ret_bench = g[\"ret_bench_m\"].values\n",
|
|||
|
|
" rate = g[\"delta_rate_m\"].values\n",
|
|||
|
|
" \n",
|
|||
|
|
" rows.append({\n",
|
|||
|
|
" ID_COL: acc,\n",
|
|||
|
|
" \n",
|
|||
|
|
" # 👇 Corrélations perf vs flux\n",
|
|||
|
|
" \"corr_flow_fund_lag3\": corr_lag(flow, ret_fund, 3),\n",
|
|||
|
|
" \"corr_flow_fund_lag6\": corr_lag(flow, ret_fund, 6),\n",
|
|||
|
|
" \n",
|
|||
|
|
" \"corr_flow_bench_lag3\": corr_lag(flow, ret_bench, 3),\n",
|
|||
|
|
" \"corr_flow_bench_lag6\": corr_lag(flow, ret_bench, 6),\n",
|
|||
|
|
" \n",
|
|||
|
|
" # 👇 Corrélation taux vs flux\n",
|
|||
|
|
" \"corr_flow_rate_lag3\": corr_lag(flow, rate, 3),\n",
|
|||
|
|
" \"corr_flow_rate_lag6\": corr_lag(flow, rate, 6),\n",
|
|||
|
|
" })\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_corr = pd.DataFrame(rows)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_client = df_client.merge(df_corr, on=ID_COL, how=\"left\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(df_client.shape)\n",
|
|||
|
|
"df_client.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 284,
|
|||
|
|
"id": "a9570578-8f9e-4b22-a8bc-4174321b7406",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"(431, 44)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>n_months</th>\n",
|
|||
|
|
" <th>n_active_months</th>\n",
|
|||
|
|
" <th>flow_freq</th>\n",
|
|||
|
|
" <th>aum_qty_mean</th>\n",
|
|||
|
|
" <th>aum_qty_median</th>\n",
|
|||
|
|
" <th>aum_qty_max</th>\n",
|
|||
|
|
" <th>aum_qty_last</th>\n",
|
|||
|
|
" <th>net_flow_qty_sum</th>\n",
|
|||
|
|
" <th>gross_flow_qty_sum</th>\n",
|
|||
|
|
" <th>gross_flow_qty_mean</th>\n",
|
|||
|
|
" <th>n_tx_total</th>\n",
|
|||
|
|
" <th>net_flow_vol</th>\n",
|
|||
|
|
" <th>turnover_mean</th>\n",
|
|||
|
|
" <th>turnover_vol</th>\n",
|
|||
|
|
" <th>flow_to_aum_mean</th>\n",
|
|||
|
|
" <th>flow_to_aum_vol</th>\n",
|
|||
|
|
" <th>avg_n_isin_held</th>\n",
|
|||
|
|
" <th>max_n_isin_held</th>\n",
|
|||
|
|
" <th>sub_share_mean</th>\n",
|
|||
|
|
" <th>red_share_mean</th>\n",
|
|||
|
|
" <th>delta_rate_mean</th>\n",
|
|||
|
|
" <th>aum_drawdown_last</th>\n",
|
|||
|
|
" <th>aum_drawdown_max</th>\n",
|
|||
|
|
" <th>region</th>\n",
|
|||
|
|
" <th>country</th>\n",
|
|||
|
|
" <th>n_isin_total</th>\n",
|
|||
|
|
" <th>rel_turnover_mean_avg</th>\n",
|
|||
|
|
" <th>rel_turnover_vol_avg</th>\n",
|
|||
|
|
" <th>rel_flow_to_aum_vol_avg</th>\n",
|
|||
|
|
" <th>full_exit_count</th>\n",
|
|||
|
|
" <th>entry_count</th>\n",
|
|||
|
|
" <th>avg_holding_months_per_isin</th>\n",
|
|||
|
|
" <th>max_holding_months_per_isin</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag3</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag6</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag3</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag6</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag3</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag6</th>\n",
|
|||
|
|
" <th>flow_trend_12m</th>\n",
|
|||
|
|
" <th>aum_trend_12m</th>\n",
|
|||
|
|
" <th>drawdown_trend_12m</th>\n",
|
|||
|
|
" <th>beta_rate</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>18872</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>143505.697354</td>\n",
|
|||
|
|
" <td>144653.1645</td>\n",
|
|||
|
|
" <td>211049.3200</td>\n",
|
|||
|
|
" <td>166752.2080</td>\n",
|
|||
|
|
" <td>-45677.1480</td>\n",
|
|||
|
|
" <td>1.244126e+06</td>\n",
|
|||
|
|
" <td>9570.200015</td>\n",
|
|||
|
|
" <td>1926.0</td>\n",
|
|||
|
|
" <td>9832.357264</td>\n",
|
|||
|
|
" <td>0.069449</td>\n",
|
|||
|
|
" <td>0.072727</td>\n",
|
|||
|
|
" <td>-0.003918</td>\n",
|
|||
|
|
" <td>0.074207</td>\n",
|
|||
|
|
" <td>39.669231</td>\n",
|
|||
|
|
" <td>50</td>\n",
|
|||
|
|
" <td>0.429844</td>\n",
|
|||
|
|
" <td>-0.576520</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>2.098899e-01</td>\n",
|
|||
|
|
" <td>0.715200</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>Switzerland</td>\n",
|
|||
|
|
" <td>108</td>\n",
|
|||
|
|
" <td>1.583591e+10</td>\n",
|
|||
|
|
" <td>1.157253e+11</td>\n",
|
|||
|
|
" <td>1.157601e+11</td>\n",
|
|||
|
|
" <td>86</td>\n",
|
|||
|
|
" <td>126</td>\n",
|
|||
|
|
" <td>47.750000</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.164687</td>\n",
|
|||
|
|
" <td>0.091805</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.125209</td>\n",
|
|||
|
|
" <td>-0.133345</td>\n",
|
|||
|
|
" <td>-0.005713</td>\n",
|
|||
|
|
" <td>6141.306969</td>\n",
|
|||
|
|
" <td>-2.909892e-02</td>\n",
|
|||
|
|
" <td>-0.058485</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>200000076</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>119</td>\n",
|
|||
|
|
" <td>0.915385</td>\n",
|
|||
|
|
" <td>24141.541138</td>\n",
|
|||
|
|
" <td>19888.8255</td>\n",
|
|||
|
|
" <td>69211.1070</td>\n",
|
|||
|
|
" <td>69211.1070</td>\n",
|
|||
|
|
" <td>54791.9840</td>\n",
|
|||
|
|
" <td>2.314415e+05</td>\n",
|
|||
|
|
" <td>1780.319492</td>\n",
|
|||
|
|
" <td>518.0</td>\n",
|
|||
|
|
" <td>2838.000232</td>\n",
|
|||
|
|
" <td>0.083230</td>\n",
|
|||
|
|
" <td>0.138485</td>\n",
|
|||
|
|
" <td>-0.000893</td>\n",
|
|||
|
|
" <td>0.152321</td>\n",
|
|||
|
|
" <td>7.430769</td>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>0.508681</td>\n",
|
|||
|
|
" <td>-0.415876</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.454392e-14</td>\n",
|
|||
|
|
" <td>0.871392</td>\n",
|
|||
|
|
" <td>Spain</td>\n",
|
|||
|
|
" <td>Spain</td>\n",
|
|||
|
|
" <td>22</td>\n",
|
|||
|
|
" <td>1.398598e+10</td>\n",
|
|||
|
|
" <td>8.307063e+10</td>\n",
|
|||
|
|
" <td>8.354432e+10</td>\n",
|
|||
|
|
" <td>14</td>\n",
|
|||
|
|
" <td>24</td>\n",
|
|||
|
|
" <td>43.909091</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.026759</td>\n",
|
|||
|
|
" <td>0.127745</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.035413</td>\n",
|
|||
|
|
" <td>0.023472</td>\n",
|
|||
|
|
" <td>0.001599</td>\n",
|
|||
|
|
" <td>1649.631811</td>\n",
|
|||
|
|
" <td>-6.132290e-04</td>\n",
|
|||
|
|
" <td>-0.102416</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>200000082</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>422994.464523</td>\n",
|
|||
|
|
" <td>462973.7880</td>\n",
|
|||
|
|
" <td>580174.7570</td>\n",
|
|||
|
|
" <td>490991.5590</td>\n",
|
|||
|
|
" <td>178371.1590</td>\n",
|
|||
|
|
" <td>2.327246e+06</td>\n",
|
|||
|
|
" <td>17901.894469</td>\n",
|
|||
|
|
" <td>7103.0</td>\n",
|
|||
|
|
" <td>13288.481111</td>\n",
|
|||
|
|
" <td>0.047480</td>\n",
|
|||
|
|
" <td>0.037140</td>\n",
|
|||
|
|
" <td>0.005194</td>\n",
|
|||
|
|
" <td>0.038831</td>\n",
|
|||
|
|
" <td>7.430769</td>\n",
|
|||
|
|
" <td>15</td>\n",
|
|||
|
|
" <td>0.467005</td>\n",
|
|||
|
|
" <td>-0.562929</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.537178e-01</td>\n",
|
|||
|
|
" <td>0.302866</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>18</td>\n",
|
|||
|
|
" <td>2.058983e+09</td>\n",
|
|||
|
|
" <td>1.422679e+10</td>\n",
|
|||
|
|
" <td>1.313063e+10</td>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>17</td>\n",
|
|||
|
|
" <td>53.666667</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>-0.096172</td>\n",
|
|||
|
|
" <td>-0.179151</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.173013</td>\n",
|
|||
|
|
" <td>-0.174161</td>\n",
|
|||
|
|
" <td>-0.000349</td>\n",
|
|||
|
|
" <td>2939.071073</td>\n",
|
|||
|
|
" <td>-5.065838e-03</td>\n",
|
|||
|
|
" <td>-0.025668</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>200000146</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>212108.397869</td>\n",
|
|||
|
|
" <td>210616.5330</td>\n",
|
|||
|
|
" <td>536769.8760</td>\n",
|
|||
|
|
" <td>536769.8760</td>\n",
|
|||
|
|
" <td>457533.3310</td>\n",
|
|||
|
|
" <td>1.150546e+06</td>\n",
|
|||
|
|
" <td>8850.350438</td>\n",
|
|||
|
|
" <td>4774.0</td>\n",
|
|||
|
|
" <td>10074.748210</td>\n",
|
|||
|
|
" <td>0.051622</td>\n",
|
|||
|
|
" <td>0.066995</td>\n",
|
|||
|
|
" <td>0.024910</td>\n",
|
|||
|
|
" <td>0.075092</td>\n",
|
|||
|
|
" <td>18.369231</td>\n",
|
|||
|
|
" <td>26</td>\n",
|
|||
|
|
" <td>0.517815</td>\n",
|
|||
|
|
" <td>-0.556667</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.998401e-15</td>\n",
|
|||
|
|
" <td>0.461533</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>33</td>\n",
|
|||
|
|
" <td>1.339995e+09</td>\n",
|
|||
|
|
" <td>1.201877e+10</td>\n",
|
|||
|
|
" <td>9.821426e+09</td>\n",
|
|||
|
|
" <td>20</td>\n",
|
|||
|
|
" <td>42</td>\n",
|
|||
|
|
" <td>72.363636</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.047976</td>\n",
|
|||
|
|
" <td>-0.183338</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>-0.139586</td>\n",
|
|||
|
|
" <td>-0.109310</td>\n",
|
|||
|
|
" <td>0.000297</td>\n",
|
|||
|
|
" <td>13502.539948</td>\n",
|
|||
|
|
" <td>-5.784029e-17</td>\n",
|
|||
|
|
" <td>-0.065034</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>200000147</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>145729.199224</td>\n",
|
|||
|
|
" <td>79260.8255</td>\n",
|
|||
|
|
" <td>530740.2621</td>\n",
|
|||
|
|
" <td>530740.2621</td>\n",
|
|||
|
|
" <td>677492.4351</td>\n",
|
|||
|
|
" <td>1.213963e+06</td>\n",
|
|||
|
|
" <td>9338.178685</td>\n",
|
|||
|
|
" <td>7585.0</td>\n",
|
|||
|
|
" <td>13868.197522</td>\n",
|
|||
|
|
" <td>0.061164</td>\n",
|
|||
|
|
" <td>0.058200</td>\n",
|
|||
|
|
" <td>0.022213</td>\n",
|
|||
|
|
" <td>0.059810</td>\n",
|
|||
|
|
" <td>46.576923</td>\n",
|
|||
|
|
" <td>54</td>\n",
|
|||
|
|
" <td>0.598820</td>\n",
|
|||
|
|
" <td>-0.448555</td>\n",
|
|||
|
|
" <td>0.013723</td>\n",
|
|||
|
|
" <td>1.998401e-15</td>\n",
|
|||
|
|
" <td>0.905503</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>Italy</td>\n",
|
|||
|
|
" <td>75</td>\n",
|
|||
|
|
" <td>9.390666e+09</td>\n",
|
|||
|
|
" <td>3.943802e+10</td>\n",
|
|||
|
|
" <td>3.943366e+10</td>\n",
|
|||
|
|
" <td>56</td>\n",
|
|||
|
|
" <td>102</td>\n",
|
|||
|
|
" <td>80.733333</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>0.103747</td>\n",
|
|||
|
|
" <td>0.126239</td>\n",
|
|||
|
|
" <td>-0.049229</td>\n",
|
|||
|
|
" <td>-0.009332</td>\n",
|
|||
|
|
" <td>-0.270671</td>\n",
|
|||
|
|
" <td>-0.216742</td>\n",
|
|||
|
|
" <td>-0.003066</td>\n",
|
|||
|
|
" <td>44438.820752</td>\n",
|
|||
|
|
" <td>-8.600167e-02</td>\n",
|
|||
|
|
" <td>-0.102510</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
|
|||
|
|
"0 18872 130 130 1.000000 143505.697354 \n",
|
|||
|
|
"1 200000076 130 119 0.915385 24141.541138 \n",
|
|||
|
|
"2 200000082 130 130 1.000000 422994.464523 \n",
|
|||
|
|
"3 200000146 130 130 1.000000 212108.397869 \n",
|
|||
|
|
"4 200000147 130 130 1.000000 145729.199224 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
|
|||
|
|
"0 144653.1645 211049.3200 166752.2080 -45677.1480 \n",
|
|||
|
|
"1 19888.8255 69211.1070 69211.1070 54791.9840 \n",
|
|||
|
|
"2 462973.7880 580174.7570 490991.5590 178371.1590 \n",
|
|||
|
|
"3 210616.5330 536769.8760 536769.8760 457533.3310 \n",
|
|||
|
|
"4 79260.8255 530740.2621 530740.2621 677492.4351 \n",
|
|||
|
|
"\n",
|
|||
|
|
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
|
|||
|
|
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
|
|||
|
|
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
|
|||
|
|
"2 2.327246e+06 17901.894469 7103.0 13288.481111 \n",
|
|||
|
|
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
|
|||
|
|
"4 1.213963e+06 9338.178685 7585.0 13868.197522 \n",
|
|||
|
|
"\n",
|
|||
|
|
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
|
|||
|
|
"0 0.069449 0.072727 -0.003918 0.074207 \n",
|
|||
|
|
"1 0.083230 0.138485 -0.000893 0.152321 \n",
|
|||
|
|
"2 0.047480 0.037140 0.005194 0.038831 \n",
|
|||
|
|
"3 0.051622 0.066995 0.024910 0.075092 \n",
|
|||
|
|
"4 0.061164 0.058200 0.022213 0.059810 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
|
|||
|
|
"0 39.669231 50 0.429844 -0.576520 \n",
|
|||
|
|
"1 7.430769 13 0.508681 -0.415876 \n",
|
|||
|
|
"2 7.430769 15 0.467005 -0.562929 \n",
|
|||
|
|
"3 18.369231 26 0.517815 -0.556667 \n",
|
|||
|
|
"4 46.576923 54 0.598820 -0.448555 \n",
|
|||
|
|
"\n",
|
|||
|
|
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
|
|||
|
|
"0 0.013723 2.098899e-01 0.715200 Switzerland \n",
|
|||
|
|
"1 0.013723 1.454392e-14 0.871392 Spain \n",
|
|||
|
|
"2 0.013723 1.537178e-01 0.302866 Italy \n",
|
|||
|
|
"3 0.013723 1.998401e-15 0.461533 Italy \n",
|
|||
|
|
"4 0.013723 1.998401e-15 0.905503 Italy \n",
|
|||
|
|
"\n",
|
|||
|
|
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
|
|||
|
|
"0 Switzerland 108 1.583591e+10 1.157253e+11 \n",
|
|||
|
|
"1 Spain 22 1.398598e+10 8.307063e+10 \n",
|
|||
|
|
"2 Italy 18 2.058983e+09 1.422679e+10 \n",
|
|||
|
|
"3 Italy 33 1.339995e+09 1.201877e+10 \n",
|
|||
|
|
"4 Italy 75 9.390666e+09 3.943802e+10 \n",
|
|||
|
|
"\n",
|
|||
|
|
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
|
|||
|
|
"0 1.157601e+11 86 126 \n",
|
|||
|
|
"1 8.354432e+10 14 24 \n",
|
|||
|
|
"2 1.313063e+10 10 17 \n",
|
|||
|
|
"3 9.821426e+09 20 42 \n",
|
|||
|
|
"4 3.943366e+10 56 102 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
|
|||
|
|
"0 47.750000 130 \n",
|
|||
|
|
"1 43.909091 130 \n",
|
|||
|
|
"2 53.666667 130 \n",
|
|||
|
|
"3 72.363636 130 \n",
|
|||
|
|
"4 80.733333 130 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
|
|||
|
|
"0 0.164687 0.091805 NaN \n",
|
|||
|
|
"1 0.026759 0.127745 NaN \n",
|
|||
|
|
"2 -0.096172 -0.179151 NaN \n",
|
|||
|
|
"3 0.047976 -0.183338 NaN \n",
|
|||
|
|
"4 0.103747 0.126239 -0.049229 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \\\n",
|
|||
|
|
"0 NaN -0.125209 -0.133345 \n",
|
|||
|
|
"1 NaN -0.035413 0.023472 \n",
|
|||
|
|
"2 NaN -0.173013 -0.174161 \n",
|
|||
|
|
"3 NaN -0.139586 -0.109310 \n",
|
|||
|
|
"4 -0.009332 -0.270671 -0.216742 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_trend_12m aum_trend_12m drawdown_trend_12m beta_rate \n",
|
|||
|
|
"0 -0.005713 6141.306969 -2.909892e-02 -0.058485 \n",
|
|||
|
|
"1 0.001599 1649.631811 -6.132290e-04 -0.102416 \n",
|
|||
|
|
"2 -0.000349 2939.071073 -5.065838e-03 -0.025668 \n",
|
|||
|
|
"3 0.000297 13502.539948 -5.784029e-17 -0.065034 \n",
|
|||
|
|
"4 -0.003066 44438.820752 -8.600167e-02 -0.102510 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 284,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"def compute_trend(y):\n",
|
|||
|
|
" y = np.asarray(y, dtype=float)\n",
|
|||
|
|
" if len(y) < 4:\n",
|
|||
|
|
" return np.nan\n",
|
|||
|
|
" x = np.arange(len(y)).reshape(-1, 1)\n",
|
|||
|
|
" mask = np.isfinite(y)\n",
|
|||
|
|
" if mask.sum() < 4:\n",
|
|||
|
|
" return np.nan\n",
|
|||
|
|
" reg = LinearRegression().fit(x[mask], y[mask])\n",
|
|||
|
|
" return reg.coef_[0]\n",
|
|||
|
|
"\n",
|
|||
|
|
"def compute_beta(y, x):\n",
|
|||
|
|
" y = np.asarray(y, dtype=float)\n",
|
|||
|
|
" x = np.asarray(x, dtype=float)\n",
|
|||
|
|
" mask = np.isfinite(y) & np.isfinite(x)\n",
|
|||
|
|
" if mask.sum() < 6:\n",
|
|||
|
|
" return np.nan\n",
|
|||
|
|
" reg = LinearRegression().fit(x[mask].reshape(-1, 1), y[mask])\n",
|
|||
|
|
" return reg.coef_[0]\n",
|
|||
|
|
"\n",
|
|||
|
|
"rows = []\n",
|
|||
|
|
"\n",
|
|||
|
|
"for acc, g in df_month.groupby(ID_COL):\n",
|
|||
|
|
" g = g.sort_values(\"month\")\n",
|
|||
|
|
"\n",
|
|||
|
|
" flow = g[\"flow_to_aum_m\"].values\n",
|
|||
|
|
" aum = g[\"aum_qty\"].values\n",
|
|||
|
|
" delta_rate = g[\"delta_rate_m\"].values\n",
|
|||
|
|
" drawdown = g[\"aum_drawdown\"].values\n",
|
|||
|
|
"\n",
|
|||
|
|
" rows.append({\n",
|
|||
|
|
" ID_COL: acc,\n",
|
|||
|
|
" \"flow_trend_12m\": compute_trend(flow[-12:]),\n",
|
|||
|
|
" \"aum_trend_12m\": compute_trend(aum[-12:]),\n",
|
|||
|
|
" \"drawdown_trend_12m\": compute_trend(drawdown[-12:]),\n",
|
|||
|
|
" \"beta_rate\": compute_beta(flow, delta_rate)\n",
|
|||
|
|
" })\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_beta = pd.DataFrame(rows)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_client = df_client.merge(df_beta, on=ID_COL, how=\"left\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(df_client.shape)\n",
|
|||
|
|
"df_client.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 295,
|
|||
|
|
"id": "5e643b2f-491d-4639-b190-a8c6218a2694",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Nb clients = 420\n",
|
|||
|
|
"Nb features = 40\n",
|
|||
|
|
"['log_aum_qty_mean', 'flow_freq', 'gross_flow_to_aum', 'turnover_vol', 'flow_to_aum_vol', 'activity_intensity', 'log_n_tx_total', 'avg_n_isin_held', 'n_isin_total', 'avg_holding_months_per_isin', 'exit_rate_per_isin', 'flow_direction_balance', 'redemption_bias', 'aum_drawdown_last', 'corr_flow_fund_lag3', 'corr_flow_fund_lag6', 'corr_flow_rate_lag3', 'corr_flow_rate_lag6', 'corr_flow_bench_lag3', 'corr_flow_bench_lag6', 'country_grp_France', 'country_grp_Germany', 'country_grp_Italy', 'country_grp_Luxembourg', 'country_grp_Monaco', 'country_grp_Other', 'country_grp_Spain', 'country_grp_Sweden', 'country_grp_Switzerland', 'country_grp_United Kingdom', 'region_grp_France', 'region_grp_Germany', 'region_grp_International', 'region_grp_Italy', 'region_grp_Luxembourg', 'region_grp_Nordics', 'region_grp_Other', 'region_grp_Spain', 'region_grp_Switzerland', 'region_grp_United Kingdom']\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"dfc = df_client.copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"gross_flow_to_aum\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"aum_qty_mean\"].abs() + EPS)\n",
|
|||
|
|
"dfc[\"avg_ticket\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"n_tx_total\"] + EPS)\n",
|
|||
|
|
"dfc[\"flow_direction_balance\"] = dfc[\"net_flow_qty_sum\"] / (dfc[\"gross_flow_qty_sum\"] + EPS)\n",
|
|||
|
|
"dfc[\"redemption_bias\"] = dfc[\"red_share_mean\"] - dfc[\"sub_share_mean\"]\n",
|
|||
|
|
"dfc[\"activity_intensity\"] = dfc[\"n_tx_total\"] / (dfc[\"n_months\"] + EPS)\n",
|
|||
|
|
"dfc[\"exit_rate_per_isin\"] = dfc[\"full_exit_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
|
|||
|
|
"dfc[\"entry_rate_per_isin\"] = dfc[\"entry_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
|
|||
|
|
"dfc[\"aum_final_to_peak\"] = dfc[\"aum_qty_last\"] / (dfc[\"aum_qty_max\"] + EPS)\n",
|
|||
|
|
"\n",
|
|||
|
|
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\", \"avg_ticket\", \"gross_flow_qty_mean\"]:\n",
|
|||
|
|
" dfc[f\"log_{col}\"] = np.log1p(dfc[col].clip(lower=0))\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc = dfc[(dfc[\"n_months\"] >= 6) & (dfc[\"aum_qty_mean\"] > 0)].copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"top_countries = dfc[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
|
|||
|
|
"top_regions = dfc[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"country_grp\"] = np.where(dfc[\"country\"].isin(top_countries), dfc[\"country\"], \"Other\")\n",
|
|||
|
|
"dfc[\"region_grp\"] = np.where(dfc[\"region\"].isin(top_regions), dfc[\"region\"], \"Other\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"base_features = [\n",
|
|||
|
|
" \"log_aum_qty_mean\",\n",
|
|||
|
|
" \"flow_freq\",\n",
|
|||
|
|
" \"gross_flow_to_aum\",\n",
|
|||
|
|
" \"turnover_vol\",\n",
|
|||
|
|
" \"flow_to_aum_vol\",\n",
|
|||
|
|
" \"activity_intensity\",\n",
|
|||
|
|
" \"log_n_tx_total\",\n",
|
|||
|
|
" \"avg_n_isin_held\",\n",
|
|||
|
|
" \"n_isin_total\",\n",
|
|||
|
|
" \"avg_holding_months_per_isin\",\n",
|
|||
|
|
" \"exit_rate_per_isin\",\n",
|
|||
|
|
" \"flow_direction_balance\",\n",
|
|||
|
|
" \"redemption_bias\",\n",
|
|||
|
|
" \"aum_drawdown_last\",\n",
|
|||
|
|
" \"corr_flow_fund_lag3\",\n",
|
|||
|
|
" \"corr_flow_fund_lag6\",\n",
|
|||
|
|
" \"corr_flow_rate_lag3\",\n",
|
|||
|
|
" \"corr_flow_rate_lag6\",\n",
|
|||
|
|
" \"corr_flow_bench_lag3\",\n",
|
|||
|
|
" \"corr_flow_bench_lag6\"\n",
|
|||
|
|
" \n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"base_features2 = [\n",
|
|||
|
|
" \"log_aum_qty_mean\",\n",
|
|||
|
|
" \"log_gross_flow_qty_mean\",\n",
|
|||
|
|
" \"n_tx_total\",\n",
|
|||
|
|
" \"flow_freq\",\n",
|
|||
|
|
" \"gross_flow_to_aum\",\n",
|
|||
|
|
" \"net_flow_vol\"\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"base_features = [c for c in base_features if c in dfc.columns]\n",
|
|||
|
|
"\n",
|
|||
|
|
"#dfc = dfc[dfc[\"Registrar Account - ID\"]!='420350']\n",
|
|||
|
|
"\n",
|
|||
|
|
"X_num = dfc[base_features].replace([np.inf, -np.inf], np.nan).fillna(dfc[base_features].median())\n",
|
|||
|
|
"X_cat = pd.get_dummies(dfc[[\"country_grp\", \"region_grp\"]].fillna(\"Unknown\"), drop_first=True)\n",
|
|||
|
|
"\n",
|
|||
|
|
"X = pd.concat([X_num.reset_index(drop=True), X_cat.reset_index(drop=True)], axis=1)\n",
|
|||
|
|
"\n",
|
|||
|
|
"scaler = StandardScaler()\n",
|
|||
|
|
"scaler2 = RobustScaler()\n",
|
|||
|
|
"\n",
|
|||
|
|
"X_scaled = scaler.fit_transform(X_num)\n",
|
|||
|
|
"X_scaled2 = scaler2.fit_transform(X_num)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Nb clients =\", X.shape[0])\n",
|
|||
|
|
"print(\"Nb features =\", X.shape[1])\n",
|
|||
|
|
"print(X.columns.tolist())"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 296,
|
|||
|
|
"id": "4c53e63d-c555-47eb-959c-0a8ad3af6428",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>k</th>\n",
|
|||
|
|
" <th>inertia</th>\n",
|
|||
|
|
" <th>silhouette</th>\n",
|
|||
|
|
" <th>davies_bouldin</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>8400.000000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>7397.240892</td>\n",
|
|||
|
|
" <td>0.168814</td>\n",
|
|||
|
|
" <td>2.176326</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>6681.911002</td>\n",
|
|||
|
|
" <td>0.146891</td>\n",
|
|||
|
|
" <td>1.794227</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>6052.515555</td>\n",
|
|||
|
|
" <td>0.172783</td>\n",
|
|||
|
|
" <td>1.710884</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>5584.303430</td>\n",
|
|||
|
|
" <td>0.166762</td>\n",
|
|||
|
|
" <td>1.403164</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5</th>\n",
|
|||
|
|
" <td>6</td>\n",
|
|||
|
|
" <td>5162.650756</td>\n",
|
|||
|
|
" <td>0.165098</td>\n",
|
|||
|
|
" <td>1.189172</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6</th>\n",
|
|||
|
|
" <td>7</td>\n",
|
|||
|
|
" <td>4822.512231</td>\n",
|
|||
|
|
" <td>0.130928</td>\n",
|
|||
|
|
" <td>1.410428</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>7</th>\n",
|
|||
|
|
" <td>8</td>\n",
|
|||
|
|
" <td>4574.136528</td>\n",
|
|||
|
|
" <td>0.128751</td>\n",
|
|||
|
|
" <td>1.446932</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>8</th>\n",
|
|||
|
|
" <td>9</td>\n",
|
|||
|
|
" <td>4393.945271</td>\n",
|
|||
|
|
" <td>0.136565</td>\n",
|
|||
|
|
" <td>1.316121</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>9</th>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>4244.461822</td>\n",
|
|||
|
|
" <td>0.128990</td>\n",
|
|||
|
|
" <td>1.351212</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>10</th>\n",
|
|||
|
|
" <td>11</td>\n",
|
|||
|
|
" <td>4140.438578</td>\n",
|
|||
|
|
" <td>0.116146</td>\n",
|
|||
|
|
" <td>1.568585</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>11</th>\n",
|
|||
|
|
" <td>12</td>\n",
|
|||
|
|
" <td>3964.003923</td>\n",
|
|||
|
|
" <td>0.107951</td>\n",
|
|||
|
|
" <td>1.411938</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>12</th>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>3852.321552</td>\n",
|
|||
|
|
" <td>0.103959</td>\n",
|
|||
|
|
" <td>1.518013</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>13</th>\n",
|
|||
|
|
" <td>14</td>\n",
|
|||
|
|
" <td>3763.668663</td>\n",
|
|||
|
|
" <td>0.114665</td>\n",
|
|||
|
|
" <td>1.421070</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>14</th>\n",
|
|||
|
|
" <td>15</td>\n",
|
|||
|
|
" <td>3650.413905</td>\n",
|
|||
|
|
" <td>0.106512</td>\n",
|
|||
|
|
" <td>1.469561</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>15</th>\n",
|
|||
|
|
" <td>16</td>\n",
|
|||
|
|
" <td>3556.274981</td>\n",
|
|||
|
|
" <td>0.112147</td>\n",
|
|||
|
|
" <td>1.512242</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>16</th>\n",
|
|||
|
|
" <td>17</td>\n",
|
|||
|
|
" <td>3455.255203</td>\n",
|
|||
|
|
" <td>0.114199</td>\n",
|
|||
|
|
" <td>1.450436</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>17</th>\n",
|
|||
|
|
" <td>18</td>\n",
|
|||
|
|
" <td>3344.323287</td>\n",
|
|||
|
|
" <td>0.104783</td>\n",
|
|||
|
|
" <td>1.385847</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>18</th>\n",
|
|||
|
|
" <td>19</td>\n",
|
|||
|
|
" <td>3310.190272</td>\n",
|
|||
|
|
" <td>0.096217</td>\n",
|
|||
|
|
" <td>1.549688</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>19</th>\n",
|
|||
|
|
" <td>20</td>\n",
|
|||
|
|
" <td>3214.679103</td>\n",
|
|||
|
|
" <td>0.100507</td>\n",
|
|||
|
|
" <td>1.380470</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" k inertia silhouette davies_bouldin\n",
|
|||
|
|
"0 1 8400.000000 NaN NaN\n",
|
|||
|
|
"1 2 7397.240892 0.168814 2.176326\n",
|
|||
|
|
"2 3 6681.911002 0.146891 1.794227\n",
|
|||
|
|
"3 4 6052.515555 0.172783 1.710884\n",
|
|||
|
|
"4 5 5584.303430 0.166762 1.403164\n",
|
|||
|
|
"5 6 5162.650756 0.165098 1.189172\n",
|
|||
|
|
"6 7 4822.512231 0.130928 1.410428\n",
|
|||
|
|
"7 8 4574.136528 0.128751 1.446932\n",
|
|||
|
|
"8 9 4393.945271 0.136565 1.316121\n",
|
|||
|
|
"9 10 4244.461822 0.128990 1.351212\n",
|
|||
|
|
"10 11 4140.438578 0.116146 1.568585\n",
|
|||
|
|
"11 12 3964.003923 0.107951 1.411938\n",
|
|||
|
|
"12 13 3852.321552 0.103959 1.518013\n",
|
|||
|
|
"13 14 3763.668663 0.114665 1.421070\n",
|
|||
|
|
"14 15 3650.413905 0.106512 1.469561\n",
|
|||
|
|
"15 16 3556.274981 0.112147 1.512242\n",
|
|||
|
|
"16 17 3455.255203 0.114199 1.450436\n",
|
|||
|
|
"17 18 3344.323287 0.104783 1.385847\n",
|
|||
|
|
"18 19 3310.190272 0.096217 1.549688\n",
|
|||
|
|
"19 20 3214.679103 0.100507 1.380470"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 296,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"rows = []\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in range(1, 21):\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
|
|||
|
|
" labels = km.fit_predict(X_scaled)\n",
|
|||
|
|
"\n",
|
|||
|
|
" row = {\n",
|
|||
|
|
" \"k\": k,\n",
|
|||
|
|
" \"inertia\": km.inertia_\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" if k >= 2:\n",
|
|||
|
|
" row[\"silhouette\"] = silhouette_score(X_scaled, labels)\n",
|
|||
|
|
" row[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n",
|
|||
|
|
" else:\n",
|
|||
|
|
" row[\"silhouette\"] = np.nan\n",
|
|||
|
|
" row[\"davies_bouldin\"] = np.nan\n",
|
|||
|
|
"\n",
|
|||
|
|
" rows.append(row)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_kdiag = pd.DataFrame(rows)\n",
|
|||
|
|
"df_kdiag"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 297,
|
|||
|
|
"id": "b92f90a4-5e9e-4ab5-8b93-66529a61e44e",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAGGCAYAAAAzegNcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA+JhJREFUeJzs3Xd4VGX6//H3ZNIIJIH0Ri8JpAChxwiCqCiigBUBFRF71wV0XREL2NifdS0gKF1WQWVB/IodCR0kQEB6DamQBEidmd8fIaMRAgkkOcnM53VduZY585xz7ntgnZNzn+e5TTabzYaIiIiIiIiIiIiIiEgd52J0ACIiIiIiIiIiIiIiIpWhooaIiIiIiIiIiIiIiNQLKmqIiIiIiIiIiIiIiEi9oKKGiIiIiIiIiIiIiIjUCypqiIiIiIiIiIiIiIhIvaCihoiIiIiIiIiIiIiI1AsqaoiIiIiIiIiIiIiISL2gooaIiIiIiIiIiIiIiNQLKmqIiIiIiIiIiIiIiEi9oKKGiIiIiIhUWb9+/Rg/frz99erVq4mMjGT16tX2bSNHjuTaa681IjwRERGpw9555x0iIyONDqNWjR8/nn79+p133KFDh4iMjGThwoX2bc74eYmci4oaIg4iMjKSd955x/667AsvOzvbwKgc398/dxEREUewY8cOHnnkEfr27UtsbCyXXnopo0aNYtasWUaHVu127drFO++8w6FDh854b86cOeVuKIiIiDiKhQsXEhkZaf+JjY0lMTGR0aNHM3PmTE6cOGF0iNVu/Pjx5XLu0KEDffr04fHHH2fXrl1GhyciVeBqdAAiUrGFCxfy9NNPV/j+Z599RqdOnWovoBowa9Ys3nzzTVatWoWbm9tZx0RGRjJ8+HCee+65Wo6u1M8//8zmzZt5+OGHDTm/iIhIbdqwYQO33347YWFh3HTTTQQGBpKamsrvv//OzJkzGTlyJADLli3DZDIZHO3F27VrF++++y7du3cnIiKi3Hvz5s2jSZMmDB061KDoREREatYjjzxCREQEJSUlZGZmsmbNGiZNmsQnn3zCf/7zH6KiomrkvPfffz/33HNPjRz7XNzd3XnppZcAsFgsHDhwgPnz5/Prr7+yZMkSgoODaz2myjDq8xKpq1TUEKkHyi4y/q5Zs2YGRFO9fvrpJy655JIKCxp1wc8//8ycOXPOWtTYvHkzZrPZgKhERERqxgcffIC3tzeff/45Pj4+5d7Lysqy/9nd3b22QxMREZFq1rt3b2JjY+2v7733XpKSkrjvvvt44IEHWLp0KZ6entV+XldXV1xda/+2pKurK9dff325bZ06deLee+/l559/5uabb671mCrDqM9LpK7S8lMi9UDv3r25/vrrz/jx8/MzOrSLkp+fz9q1a7nsssuMDuWsTp06dd4xHh4eurAQERGHcuDAAdq0aXNGQQPA39/f/ue/99Q4l127djFy5Eg6duzIpZdeytSpU88Yk5WVxTPPPENCQgKxsbFcd911LFq0qNyYs/XtgLOvPQ2we/duHnnkEbp3705sbCxDhw7l+++/t7+/cOFCHn30UQBuv/12+3IUq1evpl+/fuzcuZM1a9bYt5fNUgHIzc3l5Zdfpk+fPsTExHDFFVfw0UcfYbVaK/WZiIiI1FW9evXigQce4PDhw3z99dcAbN++nfHjx3P55ZcTGxvLJZdcwtNPP82xY8fs+y1btozIyEjWrFlzxjHnz59PZGQkf/zxB1Bxj4ivvvqKoUOHEhcXR/fu3Xn88cdJTU0tN2bfvn08/PDDXHLJJcTGxtK7d28ef/xx8vLyLijfgIAAgDMeWDx48KD9OqJjx47cfPPN/PTTT+XGlC3j9fdlLCu6Zvm73Nxcxo8fT5cuXejatSvjxo07ax5n+7wiIyN54YUXWL58Oddeey0xMTEMHDiQX375pbKpi9RbKmqIOLhjx47x6KOPEh8fT48ePXjppZcoLCwsN6akpIT33nuP/v37ExMTQ79+/fj3v/9NUVGRfczkyZPp0aMHNpvNvu3FF18kMjKSmTNn2rdlZmYSGRnJ3LlzzxtbUlISRUVF9O7du0o5lV0cLF26lPfff9/+ZMkdd9zB/v37zxj/+++/M3r0aLp06ULHjh0ZMWIE69evLzem7AJh165dPPnkk3Tr1o3bbruN8ePHM2fOHIBya2+W+XtPjcOHD/P8889z1VVXERcXR48ePXjkkUfOuk63iIhIXRQeHs7WrVvtNx0uVk5ODnfffTdRUVGMGzeOVq1a8cYbb/Dzzz/bxxQUFDBy5Ei+/vprBg0axNixY/H29mb8+PF8+umnF3TenTt3csstt7B7927GjBnD+PHj8fLy4sEHH+S7774DoFu3bvZCxX333cdrr73Ga6+9RuvWrXnmmWcICQmhVatW9u333XcfUPpgxogRI/j6668ZPHgwzz77LPHx8fz73/9m8uTJF/mJiYiIGK9sNsOKFSsAWLlyJQcPHmTo0KH861//4pprrmHp0qXcc8899vsEl112GV5eXnzzzTdnHG/p0qW0bduWdu3aVXjO999/n3HjxtG8eXPGjx/P7bffTlJSEsOHDyc3NxeAoqIiRo8ezaZNmxgxYgTPPfccN998MwcPHrSPOZ/s7Gyys7PJzMxk48aNTJ48mcaNG9O3b1/7mMzMTG699VZWrFjBsGHDePzxxyksLOT++++3X0dcLJvNxgMPPMBXX33Fddddx2OPPcbRo0cZN25cpY+xfv16nn/+ea655hr+8Y9/UFhYyCOPPFKu2CTiiPR4sUg9cOLEiTMafptMJpo0aXLefR977DHCw8N58skn2bRpE7NmzSI3N5fXXnvNPubZZ59l0aJFXHXVVYwaNYrNmzfz4Ycfsnv3bt577z0AunbtyieffMLOnTvtFyHr1q3DxcWFdevWcfvtt9u3QelNgvP5+eefiY6Otj8VUVVTp07FZDJx1113ceLECaZNm8ZTTz3Ff//7X/uYpKQkxowZQ0xMDA899BAmk4mFCxdyxx13MHfuXOLi4sod89FHH6V58+Y8/vjj2Gw2OnToQHp6Or/99lu5z6wiycnJbNy4kYEDBxISEsLhw4eZN28et99+O0uWLKFBgwYXlKuIiEhtueuuuxgzZgyDBw8mLi6OLl260KtXL3r06HFBy0Wmp6fz6quvMnjwYABuvPFG+vXrxxdffEGfPn2A0j5hu3fv5vXXX+e6664D4NZbb2XkyJG8+eab3HDDDTRq1KhK53355ZcJDQ3liy++sC+VddtttzFs2DDeeOMNrrjiCpo2bUrXrl2ZNWsWCQkJ9OjRw75///79efPNN2nSpMkZy1TMmDGDgwcPsmjRIlq0aGGPNygoiI8//pi77rqL0NDQKn9WIiIidUVISAje3t4cPHgQKP0Oveuuu8qN6dSpE0888QTr16+na9eueHp60q9fP7799lueffZZ+8yHjIwM1q5dy0MPPVTh+Q4fPsw777zDY489Zn+IAODKK69kyJAhzJ07l/vuu4/du3dz6NAh3nrrLQYMGGAfd65j/9WpU6fo1atXuW3BwcFMnz693GoYH330EZmZmcyZM4euXbsCcNNNN3HdddcxefJkLr/8clxcLu5Z8e+//561a9fyj3/8g7vvvhuAYcOG2e+vVMbu3btZunSpfXnyHj16cP3117NkyRJGjBhxUfGJ1GUqaojUA3feeecZ29zd3UlOTj7vvhEREbz//vsADB8+nEaNGjF37lzuuusuoqKi2L59O4sWLeKmm26yN8saPnw4fn5+TJ8+nVWrVtGzZ0+6dOkClBYt2rVrR15eHn/88QdXXnmlvZBR9n7jxo1p06bNeWP75ZdfLqrxZmFhIV9++aX9RoWPjw8vv/wyf/zxB+3atcNms/H888/To0cPpk2bZm9meuuttzJw4EDefPNNpk+fXu6YUVFRTJkypdy2Fi1a8Ntvv51xQ+NsLrvssnIXVgB9+/bllltu4dtvv7Xf0BEREamrLrnkEubPn89HH33EihUr2LhxI9OmTcPPz4+XXnqJyy+/vErH8/LyKvcd6u7uTmxsrP0mCZReEwQ
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1600x400 with 3 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
|
|||
|
|
"\n",
|
|||
|
|
"axes[0].plot(df_kdiag[\"k\"], df_kdiag[\"inertia\"], marker=\"o\")\n",
|
|||
|
|
"axes[0].set_title(\"Elbow / Inertia\")\n",
|
|||
|
|
"axes[0].set_xlabel(\"K\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"axes[1].plot(df_kdiag[\"k\"], df_kdiag[\"silhouette\"], marker=\"o\")\n",
|
|||
|
|
"axes[1].set_title(\"Silhouette\")\n",
|
|||
|
|
"axes[1].set_xlabel(\"K\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"axes[2].plot(df_kdiag[\"k\"], df_kdiag[\"davies_bouldin\"], marker=\"o\")\n",
|
|||
|
|
"axes[2].set_title(\"Davies-Bouldin\")\n",
|
|||
|
|
"axes[2].set_xlabel(\"K\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.tight_layout()\n",
|
|||
|
|
"plt.show()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 301,
|
|||
|
|
"id": "15b0cadc-e1f9-4ac8-8baa-ef58cd30de22",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"K=2 | silhouette=0.1688 | davies_bouldin=2.1763\n",
|
|||
|
|
"K=5 | silhouette=0.1668 | davies_bouldin=1.4032\n",
|
|||
|
|
"K=6 | silhouette=0.1651 | davies_bouldin=1.1892\n",
|
|||
|
|
"K=10 | silhouette=0.1290 | davies_bouldin=1.3512\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"RESULTS = {}\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in [2, 5, 6, 10]:\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=50, random_state=42)\n",
|
|||
|
|
" labels = km.fit_predict(X_scaled)\n",
|
|||
|
|
" dfc[f\"cluster_k{k}\"] = labels\n",
|
|||
|
|
"\n",
|
|||
|
|
" RESULTS[k] = {\n",
|
|||
|
|
" \"model\": km,\n",
|
|||
|
|
" \"labels\": labels,\n",
|
|||
|
|
" \"silhouette\": silhouette_score(X_scaled, labels),\n",
|
|||
|
|
" \"davies_bouldin\": davies_bouldin_score(X_scaled, labels)\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in [2, 5, 6, 10]:\n",
|
|||
|
|
" print(f\"K={k} | silhouette={RESULTS[k]['silhouette']:.4f} | davies_bouldin={RESULTS[k]['davies_bouldin']:.4f}\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 302,
|
|||
|
|
"id": "62109253-215a-45c8-b94b-5721708b4c00",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"===== K=2 =====\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_mean_med</th>\n",
|
|||
|
|
" <th>gross_flow_to_aum_med</th>\n",
|
|||
|
|
" <th>flow_freq_med</th>\n",
|
|||
|
|
" <th>n_tx_total_med</th>\n",
|
|||
|
|
" <th>avg_n_isin_held_med</th>\n",
|
|||
|
|
" <th>n_isin_total_med</th>\n",
|
|||
|
|
" <th>avg_holding_months_per_isin_med</th>\n",
|
|||
|
|
" <th>exit_rate_per_isin_med</th>\n",
|
|||
|
|
" <th>flow_direction_balance_med</th>\n",
|
|||
|
|
" <th>redemption_bias_med</th>\n",
|
|||
|
|
" <th>aum_drawdown_last_med</th>\n",
|
|||
|
|
" <th>aum_final_to_peak_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag6_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_k2</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>302</td>\n",
|
|||
|
|
" <td>41119.269717</td>\n",
|
|||
|
|
" <td>4.017738</td>\n",
|
|||
|
|
" <td>0.952333</td>\n",
|
|||
|
|
" <td>597.5</td>\n",
|
|||
|
|
" <td>5.957143</td>\n",
|
|||
|
|
" <td>14.0</td>\n",
|
|||
|
|
" <td>35.300000</td>\n",
|
|||
|
|
" <td>0.500000</td>\n",
|
|||
|
|
" <td>0.193781</td>\n",
|
|||
|
|
" <td>-0.981125</td>\n",
|
|||
|
|
" <td>0.152227</td>\n",
|
|||
|
|
" <td>0.847773</td>\n",
|
|||
|
|
" <td>0.016582</td>\n",
|
|||
|
|
" <td>0.010983</td>\n",
|
|||
|
|
" <td>0.001345</td>\n",
|
|||
|
|
" <td>-0.003738</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>118</td>\n",
|
|||
|
|
" <td>234686.471813</td>\n",
|
|||
|
|
" <td>4.612726</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>6917.0</td>\n",
|
|||
|
|
" <td>27.282197</td>\n",
|
|||
|
|
" <td>51.0</td>\n",
|
|||
|
|
" <td>56.941176</td>\n",
|
|||
|
|
" <td>0.626952</td>\n",
|
|||
|
|
" <td>0.011255</td>\n",
|
|||
|
|
" <td>-1.052709</td>\n",
|
|||
|
|
" <td>0.188189</td>\n",
|
|||
|
|
" <td>0.811811</td>\n",
|
|||
|
|
" <td>0.087368</td>\n",
|
|||
|
|
" <td>0.057736</td>\n",
|
|||
|
|
" <td>-0.075523</td>\n",
|
|||
|
|
" <td>-0.058560</td>\n",
|
|||
|
|
" <td>-0.009615</td>\n",
|
|||
|
|
" <td>-0.003502</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 302 41119.269717 4.017738 0.952333 \n",
|
|||
|
|
"1 118 234686.471813 4.612726 1.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 597.5 5.957143 14.0 \n",
|
|||
|
|
"1 6917.0 27.282197 51.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 35.300000 0.500000 \n",
|
|||
|
|
"1 56.941176 0.626952 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_direction_balance_med redemption_bias_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 0.193781 -0.981125 \n",
|
|||
|
|
"1 0.011255 -1.052709 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 0.152227 0.847773 \n",
|
|||
|
|
"1 0.188189 0.811811 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 0.016582 0.010983 \n",
|
|||
|
|
"1 0.087368 0.057736 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \\\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 0.001345 -0.003738 \n",
|
|||
|
|
"1 -0.075523 -0.058560 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_bench_lag3_med corr_flow_bench_lag6_med \n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 NaN NaN \n",
|
|||
|
|
"1 -0.009615 -0.003502 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"===== K=5 =====\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_mean_med</th>\n",
|
|||
|
|
" <th>gross_flow_to_aum_med</th>\n",
|
|||
|
|
" <th>flow_freq_med</th>\n",
|
|||
|
|
" <th>n_tx_total_med</th>\n",
|
|||
|
|
" <th>avg_n_isin_held_med</th>\n",
|
|||
|
|
" <th>n_isin_total_med</th>\n",
|
|||
|
|
" <th>avg_holding_months_per_isin_med</th>\n",
|
|||
|
|
" <th>exit_rate_per_isin_med</th>\n",
|
|||
|
|
" <th>flow_direction_balance_med</th>\n",
|
|||
|
|
" <th>redemption_bias_med</th>\n",
|
|||
|
|
" <th>aum_drawdown_last_med</th>\n",
|
|||
|
|
" <th>aum_final_to_peak_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag6_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_k5</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>252</td>\n",
|
|||
|
|
" <td>48109.772405</td>\n",
|
|||
|
|
" <td>5.274532</td>\n",
|
|||
|
|
" <td>0.986577</td>\n",
|
|||
|
|
" <td>1338.5</td>\n",
|
|||
|
|
" <td>9.132276</td>\n",
|
|||
|
|
" <td>20.5</td>\n",
|
|||
|
|
" <td>39.006111</td>\n",
|
|||
|
|
" <td>0.579796</td>\n",
|
|||
|
|
" <td>0.098689</td>\n",
|
|||
|
|
" <td>-1.008736</td>\n",
|
|||
|
|
" <td>2.602536e-01</td>\n",
|
|||
|
|
" <td>0.739746</td>\n",
|
|||
|
|
" <td>0.026895</td>\n",
|
|||
|
|
" <td>0.014226</td>\n",
|
|||
|
|
" <td>-0.009377</td>\n",
|
|||
|
|
" <td>-0.014814</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>84</td>\n",
|
|||
|
|
" <td>49000.494283</td>\n",
|
|||
|
|
" <td>1.877722</td>\n",
|
|||
|
|
" <td>0.226496</td>\n",
|
|||
|
|
" <td>16.5</td>\n",
|
|||
|
|
" <td>1.433694</td>\n",
|
|||
|
|
" <td>2.0</td>\n",
|
|||
|
|
" <td>27.176471</td>\n",
|
|||
|
|
" <td>0.205263</td>\n",
|
|||
|
|
" <td>0.539178</td>\n",
|
|||
|
|
" <td>-0.234454</td>\n",
|
|||
|
|
" <td>2.993803e-02</td>\n",
|
|||
|
|
" <td>0.970062</td>\n",
|
|||
|
|
" <td>0.007295</td>\n",
|
|||
|
|
" <td>-0.001797</td>\n",
|
|||
|
|
" <td>0.022796</td>\n",
|
|||
|
|
" <td>0.034307</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>73</td>\n",
|
|||
|
|
" <td>391396.844698</td>\n",
|
|||
|
|
" <td>4.520165</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>9683.0</td>\n",
|
|||
|
|
" <td>34.607692</td>\n",
|
|||
|
|
" <td>59.0</td>\n",
|
|||
|
|
" <td>62.220339</td>\n",
|
|||
|
|
" <td>0.622951</td>\n",
|
|||
|
|
" <td>0.026428</td>\n",
|
|||
|
|
" <td>-1.067148</td>\n",
|
|||
|
|
" <td>9.427729e-02</td>\n",
|
|||
|
|
" <td>0.905723</td>\n",
|
|||
|
|
" <td>0.116467</td>\n",
|
|||
|
|
" <td>0.096215</td>\n",
|
|||
|
|
" <td>-0.125981</td>\n",
|
|||
|
|
" <td>-0.089640</td>\n",
|
|||
|
|
" <td>-0.018484</td>\n",
|
|||
|
|
" <td>0.002329</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>130579.662288</td>\n",
|
|||
|
|
" <td>24.008163</td>\n",
|
|||
|
|
" <td>0.980769</td>\n",
|
|||
|
|
" <td>4383.5</td>\n",
|
|||
|
|
" <td>9.309615</td>\n",
|
|||
|
|
" <td>45.5</td>\n",
|
|||
|
|
" <td>27.651977</td>\n",
|
|||
|
|
" <td>0.352542</td>\n",
|
|||
|
|
" <td>-0.024961</td>\n",
|
|||
|
|
" <td>-1.016636</td>\n",
|
|||
|
|
" <td>1.313516e-01</td>\n",
|
|||
|
|
" <td>0.868648</td>\n",
|
|||
|
|
" <td>0.000878</td>\n",
|
|||
|
|
" <td>0.001310</td>\n",
|
|||
|
|
" <td>0.008806</td>\n",
|
|||
|
|
" <td>0.011620</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>59193.615277</td>\n",
|
|||
|
|
" <td>6.061456</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>2414.0</td>\n",
|
|||
|
|
" <td>43.123077</td>\n",
|
|||
|
|
" <td>103.0</td>\n",
|
|||
|
|
" <td>54.427184</td>\n",
|
|||
|
|
" <td>0.728155</td>\n",
|
|||
|
|
" <td>0.015762</td>\n",
|
|||
|
|
" <td>-1.017068</td>\n",
|
|||
|
|
" <td>1.076916e-14</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>0.163810</td>\n",
|
|||
|
|
" <td>-0.062279</td>\n",
|
|||
|
|
" <td>-0.241822</td>\n",
|
|||
|
|
" <td>-0.208029</td>\n",
|
|||
|
|
" <td>0.235343</td>\n",
|
|||
|
|
" <td>-0.239479</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 252 48109.772405 5.274532 0.986577 \n",
|
|||
|
|
"0 84 49000.494283 1.877722 0.226496 \n",
|
|||
|
|
"1 73 391396.844698 4.520165 1.000000 \n",
|
|||
|
|
"4 10 130579.662288 24.008163 0.980769 \n",
|
|||
|
|
"3 1 59193.615277 6.061456 1.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 1338.5 9.132276 20.5 \n",
|
|||
|
|
"0 16.5 1.433694 2.0 \n",
|
|||
|
|
"1 9683.0 34.607692 59.0 \n",
|
|||
|
|
"4 4383.5 9.309615 45.5 \n",
|
|||
|
|
"3 2414.0 43.123077 103.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 39.006111 0.579796 \n",
|
|||
|
|
"0 27.176471 0.205263 \n",
|
|||
|
|
"1 62.220339 0.622951 \n",
|
|||
|
|
"4 27.651977 0.352542 \n",
|
|||
|
|
"3 54.427184 0.728155 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_direction_balance_med redemption_bias_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 0.098689 -1.008736 \n",
|
|||
|
|
"0 0.539178 -0.234454 \n",
|
|||
|
|
"1 0.026428 -1.067148 \n",
|
|||
|
|
"4 -0.024961 -1.016636 \n",
|
|||
|
|
"3 0.015762 -1.017068 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 2.602536e-01 0.739746 \n",
|
|||
|
|
"0 2.993803e-02 0.970062 \n",
|
|||
|
|
"1 9.427729e-02 0.905723 \n",
|
|||
|
|
"4 1.313516e-01 0.868648 \n",
|
|||
|
|
"3 1.076916e-14 1.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 0.026895 0.014226 \n",
|
|||
|
|
"0 0.007295 -0.001797 \n",
|
|||
|
|
"1 0.116467 0.096215 \n",
|
|||
|
|
"4 0.000878 0.001310 \n",
|
|||
|
|
"3 0.163810 -0.062279 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \\\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 -0.009377 -0.014814 \n",
|
|||
|
|
"0 0.022796 0.034307 \n",
|
|||
|
|
"1 -0.125981 -0.089640 \n",
|
|||
|
|
"4 0.008806 0.011620 \n",
|
|||
|
|
"3 -0.241822 -0.208029 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_bench_lag3_med corr_flow_bench_lag6_med \n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 NaN NaN \n",
|
|||
|
|
"0 NaN NaN \n",
|
|||
|
|
"1 -0.018484 0.002329 \n",
|
|||
|
|
"4 NaN NaN \n",
|
|||
|
|
"3 0.235343 -0.239479 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"===== K=6 =====\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_mean_med</th>\n",
|
|||
|
|
" <th>gross_flow_to_aum_med</th>\n",
|
|||
|
|
" <th>flow_freq_med</th>\n",
|
|||
|
|
" <th>n_tx_total_med</th>\n",
|
|||
|
|
" <th>avg_n_isin_held_med</th>\n",
|
|||
|
|
" <th>n_isin_total_med</th>\n",
|
|||
|
|
" <th>avg_holding_months_per_isin_med</th>\n",
|
|||
|
|
" <th>exit_rate_per_isin_med</th>\n",
|
|||
|
|
" <th>flow_direction_balance_med</th>\n",
|
|||
|
|
" <th>redemption_bias_med</th>\n",
|
|||
|
|
" <th>aum_drawdown_last_med</th>\n",
|
|||
|
|
" <th>aum_final_to_peak_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag6_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_k6</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>253</td>\n",
|
|||
|
|
" <td>48896.531271</td>\n",
|
|||
|
|
" <td>5.278802</td>\n",
|
|||
|
|
" <td>0.989247</td>\n",
|
|||
|
|
" <td>1383.0</td>\n",
|
|||
|
|
" <td>9.484615</td>\n",
|
|||
|
|
" <td>21.0</td>\n",
|
|||
|
|
" <td>39.040000</td>\n",
|
|||
|
|
" <td>0.580645</td>\n",
|
|||
|
|
" <td>0.091049</td>\n",
|
|||
|
|
" <td>-1.008962e+00</td>\n",
|
|||
|
|
" <td>2.559036e-01</td>\n",
|
|||
|
|
" <td>0.744096</td>\n",
|
|||
|
|
" <td>0.027031</td>\n",
|
|||
|
|
" <td>0.014335</td>\n",
|
|||
|
|
" <td>-0.003210</td>\n",
|
|||
|
|
" <td>-0.010739</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>85</td>\n",
|
|||
|
|
" <td>48759.866216</td>\n",
|
|||
|
|
" <td>1.854909</td>\n",
|
|||
|
|
" <td>0.230769</td>\n",
|
|||
|
|
" <td>20.0</td>\n",
|
|||
|
|
" <td>1.436620</td>\n",
|
|||
|
|
" <td>3.0</td>\n",
|
|||
|
|
" <td>25.352941</td>\n",
|
|||
|
|
" <td>0.210526</td>\n",
|
|||
|
|
" <td>0.534613</td>\n",
|
|||
|
|
" <td>-2.381395e-01</td>\n",
|
|||
|
|
" <td>2.313075e-02</td>\n",
|
|||
|
|
" <td>0.976869</td>\n",
|
|||
|
|
" <td>0.009550</td>\n",
|
|||
|
|
" <td>-0.000508</td>\n",
|
|||
|
|
" <td>0.022261</td>\n",
|
|||
|
|
" <td>0.033446</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>70</td>\n",
|
|||
|
|
" <td>408912.283887</td>\n",
|
|||
|
|
" <td>4.434973</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>9639.0</td>\n",
|
|||
|
|
" <td>34.826538</td>\n",
|
|||
|
|
" <td>60.0</td>\n",
|
|||
|
|
" <td>65.085456</td>\n",
|
|||
|
|
" <td>0.613200</td>\n",
|
|||
|
|
" <td>0.028370</td>\n",
|
|||
|
|
" <td>-1.066758e+00</td>\n",
|
|||
|
|
" <td>9.226326e-02</td>\n",
|
|||
|
|
" <td>0.907737</td>\n",
|
|||
|
|
" <td>0.117652</td>\n",
|
|||
|
|
" <td>0.100263</td>\n",
|
|||
|
|
" <td>-0.128728</td>\n",
|
|||
|
|
" <td>-0.090843</td>\n",
|
|||
|
|
" <td>-0.018484</td>\n",
|
|||
|
|
" <td>0.002329</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>130579.662288</td>\n",
|
|||
|
|
" <td>24.008163</td>\n",
|
|||
|
|
" <td>0.980769</td>\n",
|
|||
|
|
" <td>4383.5</td>\n",
|
|||
|
|
" <td>9.309615</td>\n",
|
|||
|
|
" <td>45.5</td>\n",
|
|||
|
|
" <td>27.651977</td>\n",
|
|||
|
|
" <td>0.352542</td>\n",
|
|||
|
|
" <td>-0.024961</td>\n",
|
|||
|
|
" <td>-1.016636e+00</td>\n",
|
|||
|
|
" <td>1.313516e-01</td>\n",
|
|||
|
|
" <td>0.868648</td>\n",
|
|||
|
|
" <td>0.000878</td>\n",
|
|||
|
|
" <td>0.001310</td>\n",
|
|||
|
|
" <td>0.008806</td>\n",
|
|||
|
|
" <td>0.011620</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>59193.615277</td>\n",
|
|||
|
|
" <td>6.061456</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>2414.0</td>\n",
|
|||
|
|
" <td>43.123077</td>\n",
|
|||
|
|
" <td>103.0</td>\n",
|
|||
|
|
" <td>54.427184</td>\n",
|
|||
|
|
" <td>0.728155</td>\n",
|
|||
|
|
" <td>0.015762</td>\n",
|
|||
|
|
" <td>-1.017068e+00</td>\n",
|
|||
|
|
" <td>1.076916e-14</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>0.163810</td>\n",
|
|||
|
|
" <td>-0.062279</td>\n",
|
|||
|
|
" <td>-0.241822</td>\n",
|
|||
|
|
" <td>-0.208029</td>\n",
|
|||
|
|
" <td>0.235343</td>\n",
|
|||
|
|
" <td>-0.239479</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>75884.615385</td>\n",
|
|||
|
|
" <td>4.849468</td>\n",
|
|||
|
|
" <td>0.282051</td>\n",
|
|||
|
|
" <td>16.0</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>1.0</td>\n",
|
|||
|
|
" <td>39.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.152174</td>\n",
|
|||
|
|
" <td>-6.666667e+12</td>\n",
|
|||
|
|
" <td>6.853147e-01</td>\n",
|
|||
|
|
" <td>0.314685</td>\n",
|
|||
|
|
" <td>-0.135836</td>\n",
|
|||
|
|
" <td>0.145882</td>\n",
|
|||
|
|
" <td>-0.113162</td>\n",
|
|||
|
|
" <td>-0.214312</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 253 48896.531271 5.278802 0.989247 \n",
|
|||
|
|
"2 85 48759.866216 1.854909 0.230769 \n",
|
|||
|
|
"0 70 408912.283887 4.434973 1.000000 \n",
|
|||
|
|
"1 10 130579.662288 24.008163 0.980769 \n",
|
|||
|
|
"3 1 59193.615277 6.061456 1.000000 \n",
|
|||
|
|
"5 1 75884.615385 4.849468 0.282051 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 1383.0 9.484615 21.0 \n",
|
|||
|
|
"2 20.0 1.436620 3.0 \n",
|
|||
|
|
"0 9639.0 34.826538 60.0 \n",
|
|||
|
|
"1 4383.5 9.309615 45.5 \n",
|
|||
|
|
"3 2414.0 43.123077 103.0 \n",
|
|||
|
|
"5 16.0 1.000000 1.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 39.040000 0.580645 \n",
|
|||
|
|
"2 25.352941 0.210526 \n",
|
|||
|
|
"0 65.085456 0.613200 \n",
|
|||
|
|
"1 27.651977 0.352542 \n",
|
|||
|
|
"3 54.427184 0.728155 \n",
|
|||
|
|
"5 39.000000 0.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_direction_balance_med redemption_bias_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 0.091049 -1.008962e+00 \n",
|
|||
|
|
"2 0.534613 -2.381395e-01 \n",
|
|||
|
|
"0 0.028370 -1.066758e+00 \n",
|
|||
|
|
"1 -0.024961 -1.016636e+00 \n",
|
|||
|
|
"3 0.015762 -1.017068e+00 \n",
|
|||
|
|
"5 0.152174 -6.666667e+12 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 2.559036e-01 0.744096 \n",
|
|||
|
|
"2 2.313075e-02 0.976869 \n",
|
|||
|
|
"0 9.226326e-02 0.907737 \n",
|
|||
|
|
"1 1.313516e-01 0.868648 \n",
|
|||
|
|
"3 1.076916e-14 1.000000 \n",
|
|||
|
|
"5 6.853147e-01 0.314685 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 0.027031 0.014335 \n",
|
|||
|
|
"2 0.009550 -0.000508 \n",
|
|||
|
|
"0 0.117652 0.100263 \n",
|
|||
|
|
"1 0.000878 0.001310 \n",
|
|||
|
|
"3 0.163810 -0.062279 \n",
|
|||
|
|
"5 -0.135836 0.145882 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \\\n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 -0.003210 -0.010739 \n",
|
|||
|
|
"2 0.022261 0.033446 \n",
|
|||
|
|
"0 -0.128728 -0.090843 \n",
|
|||
|
|
"1 0.008806 0.011620 \n",
|
|||
|
|
"3 -0.241822 -0.208029 \n",
|
|||
|
|
"5 -0.113162 -0.214312 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_bench_lag3_med corr_flow_bench_lag6_med \n",
|
|||
|
|
"cluster_k6 \n",
|
|||
|
|
"4 NaN NaN \n",
|
|||
|
|
"2 NaN NaN \n",
|
|||
|
|
"0 -0.018484 0.002329 \n",
|
|||
|
|
"1 NaN NaN \n",
|
|||
|
|
"3 0.235343 -0.239479 \n",
|
|||
|
|
"5 NaN NaN "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"===== K=10 =====\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_mean_med</th>\n",
|
|||
|
|
" <th>gross_flow_to_aum_med</th>\n",
|
|||
|
|
" <th>flow_freq_med</th>\n",
|
|||
|
|
" <th>n_tx_total_med</th>\n",
|
|||
|
|
" <th>avg_n_isin_held_med</th>\n",
|
|||
|
|
" <th>n_isin_total_med</th>\n",
|
|||
|
|
" <th>avg_holding_months_per_isin_med</th>\n",
|
|||
|
|
" <th>exit_rate_per_isin_med</th>\n",
|
|||
|
|
" <th>flow_direction_balance_med</th>\n",
|
|||
|
|
" <th>redemption_bias_med</th>\n",
|
|||
|
|
" <th>aum_drawdown_last_med</th>\n",
|
|||
|
|
" <th>aum_final_to_peak_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_fund_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_rate_lag6_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag3_med</th>\n",
|
|||
|
|
" <th>corr_flow_bench_lag6_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_k10</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>113</td>\n",
|
|||
|
|
" <td>63010.409768</td>\n",
|
|||
|
|
" <td>3.947961</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>1525.0</td>\n",
|
|||
|
|
" <td>10.853846</td>\n",
|
|||
|
|
" <td>23.0</td>\n",
|
|||
|
|
" <td>50.500000</td>\n",
|
|||
|
|
" <td>0.576923</td>\n",
|
|||
|
|
" <td>-0.150123</td>\n",
|
|||
|
|
" <td>-1.021636e+00</td>\n",
|
|||
|
|
" <td>5.794239e-01</td>\n",
|
|||
|
|
" <td>0.420576</td>\n",
|
|||
|
|
" <td>0.023201</td>\n",
|
|||
|
|
" <td>0.035668</td>\n",
|
|||
|
|
" <td>-0.023551</td>\n",
|
|||
|
|
" <td>-0.044320</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>8</th>\n",
|
|||
|
|
" <td>110</td>\n",
|
|||
|
|
" <td>23499.160487</td>\n",
|
|||
|
|
" <td>5.396211</td>\n",
|
|||
|
|
" <td>0.971825</td>\n",
|
|||
|
|
" <td>695.0</td>\n",
|
|||
|
|
" <td>6.411072</td>\n",
|
|||
|
|
" <td>15.0</td>\n",
|
|||
|
|
" <td>33.385294</td>\n",
|
|||
|
|
" <td>0.500000</td>\n",
|
|||
|
|
" <td>0.261299</td>\n",
|
|||
|
|
" <td>-1.000000e+00</td>\n",
|
|||
|
|
" <td>1.217922e-03</td>\n",
|
|||
|
|
" <td>0.998782</td>\n",
|
|||
|
|
" <td>0.031597</td>\n",
|
|||
|
|
" <td>0.000212</td>\n",
|
|||
|
|
" <td>-0.053542</td>\n",
|
|||
|
|
" <td>-0.021419</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>56</td>\n",
|
|||
|
|
" <td>69604.137526</td>\n",
|
|||
|
|
" <td>1.597203</td>\n",
|
|||
|
|
" <td>0.096169</td>\n",
|
|||
|
|
" <td>5.5</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>2.0</td>\n",
|
|||
|
|
" <td>32.303571</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.411282</td>\n",
|
|||
|
|
" <td>-1.038075e-01</td>\n",
|
|||
|
|
" <td>1.023987e-01</td>\n",
|
|||
|
|
" <td>0.897601</td>\n",
|
|||
|
|
" <td>0.017582</td>\n",
|
|||
|
|
" <td>-0.003086</td>\n",
|
|||
|
|
" <td>0.030811</td>\n",
|
|||
|
|
" <td>0.030144</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>55</td>\n",
|
|||
|
|
" <td>112893.522820</td>\n",
|
|||
|
|
" <td>3.412073</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>2817.0</td>\n",
|
|||
|
|
" <td>17.783333</td>\n",
|
|||
|
|
" <td>32.0</td>\n",
|
|||
|
|
" <td>36.075000</td>\n",
|
|||
|
|
" <td>0.567164</td>\n",
|
|||
|
|
" <td>0.247587</td>\n",
|
|||
|
|
" <td>-1.017058e+00</td>\n",
|
|||
|
|
" <td>1.823180e-02</td>\n",
|
|||
|
|
" <td>0.981768</td>\n",
|
|||
|
|
" <td>0.024635</td>\n",
|
|||
|
|
" <td>0.036707</td>\n",
|
|||
|
|
" <td>0.144550</td>\n",
|
|||
|
|
" <td>0.179594</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>49</td>\n",
|
|||
|
|
" <td>560168.667962</td>\n",
|
|||
|
|
" <td>4.651358</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>12261.0</td>\n",
|
|||
|
|
" <td>39.669231</td>\n",
|
|||
|
|
" <td>64.0</td>\n",
|
|||
|
|
" <td>66.357143</td>\n",
|
|||
|
|
" <td>0.630952</td>\n",
|
|||
|
|
" <td>0.030313</td>\n",
|
|||
|
|
" <td>-1.079437e+00</td>\n",
|
|||
|
|
" <td>2.594109e-02</td>\n",
|
|||
|
|
" <td>0.974059</td>\n",
|
|||
|
|
" <td>0.136713</td>\n",
|
|||
|
|
" <td>0.120367</td>\n",
|
|||
|
|
" <td>-0.145236</td>\n",
|
|||
|
|
" <td>-0.105743</td>\n",
|
|||
|
|
" <td>-0.009615</td>\n",
|
|||
|
|
" <td>0.012359</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>25</td>\n",
|
|||
|
|
" <td>57501.203108</td>\n",
|
|||
|
|
" <td>17.261817</td>\n",
|
|||
|
|
" <td>0.959184</td>\n",
|
|||
|
|
" <td>1528.0</td>\n",
|
|||
|
|
" <td>6.569231</td>\n",
|
|||
|
|
" <td>21.0</td>\n",
|
|||
|
|
" <td>31.894737</td>\n",
|
|||
|
|
" <td>0.775000</td>\n",
|
|||
|
|
" <td>0.023211</td>\n",
|
|||
|
|
" <td>-1.001768e+00</td>\n",
|
|||
|
|
" <td>5.792661e-01</td>\n",
|
|||
|
|
" <td>0.420734</td>\n",
|
|||
|
|
" <td>0.033544</td>\n",
|
|||
|
|
" <td>0.005462</td>\n",
|
|||
|
|
" <td>0.008353</td>\n",
|
|||
|
|
" <td>0.011572</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6</th>\n",
|
|||
|
|
" <td>9</td>\n",
|
|||
|
|
" <td>194997.827408</td>\n",
|
|||
|
|
" <td>18.003149</td>\n",
|
|||
|
|
" <td>0.984615</td>\n",
|
|||
|
|
" <td>7055.0</td>\n",
|
|||
|
|
" <td>11.907692</td>\n",
|
|||
|
|
" <td>51.0</td>\n",
|
|||
|
|
" <td>29.066667</td>\n",
|
|||
|
|
" <td>0.400000</td>\n",
|
|||
|
|
" <td>-0.042623</td>\n",
|
|||
|
|
" <td>-1.023016e+00</td>\n",
|
|||
|
|
" <td>2.153833e-14</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>0.001358</td>\n",
|
|||
|
|
" <td>0.001471</td>\n",
|
|||
|
|
" <td>0.005738</td>\n",
|
|||
|
|
" <td>0.009084</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>59193.615277</td>\n",
|
|||
|
|
" <td>6.061456</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>2414.0</td>\n",
|
|||
|
|
" <td>43.123077</td>\n",
|
|||
|
|
" <td>103.0</td>\n",
|
|||
|
|
" <td>54.427184</td>\n",
|
|||
|
|
" <td>0.728155</td>\n",
|
|||
|
|
" <td>0.015762</td>\n",
|
|||
|
|
" <td>-1.017068e+00</td>\n",
|
|||
|
|
" <td>1.076916e-14</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>0.163810</td>\n",
|
|||
|
|
" <td>-0.062279</td>\n",
|
|||
|
|
" <td>-0.241822</td>\n",
|
|||
|
|
" <td>-0.208029</td>\n",
|
|||
|
|
" <td>0.235343</td>\n",
|
|||
|
|
" <td>-0.239479</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>7</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>216280.070269</td>\n",
|
|||
|
|
" <td>4.400942</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>8591.0</td>\n",
|
|||
|
|
" <td>44.053846</td>\n",
|
|||
|
|
" <td>78.0</td>\n",
|
|||
|
|
" <td>73.423077</td>\n",
|
|||
|
|
" <td>0.551282</td>\n",
|
|||
|
|
" <td>0.223073</td>\n",
|
|||
|
|
" <td>-1.034850e+00</td>\n",
|
|||
|
|
" <td>2.553513e-15</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>0.135939</td>\n",
|
|||
|
|
" <td>0.057736</td>\n",
|
|||
|
|
" <td>-0.124601</td>\n",
|
|||
|
|
" <td>-0.139955</td>\n",
|
|||
|
|
" <td>-0.287599</td>\n",
|
|||
|
|
" <td>-0.015040</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>9</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>75884.615385</td>\n",
|
|||
|
|
" <td>4.849468</td>\n",
|
|||
|
|
" <td>0.282051</td>\n",
|
|||
|
|
" <td>16.0</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>1.0</td>\n",
|
|||
|
|
" <td>39.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.152174</td>\n",
|
|||
|
|
" <td>-6.666667e+12</td>\n",
|
|||
|
|
" <td>6.853147e-01</td>\n",
|
|||
|
|
" <td>0.314685</td>\n",
|
|||
|
|
" <td>-0.135836</td>\n",
|
|||
|
|
" <td>0.145882</td>\n",
|
|||
|
|
" <td>-0.113162</td>\n",
|
|||
|
|
" <td>-0.214312</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_mean_med gross_flow_to_aum_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 113 63010.409768 3.947961 \n",
|
|||
|
|
"8 110 23499.160487 5.396211 \n",
|
|||
|
|
"3 56 69604.137526 1.597203 \n",
|
|||
|
|
"2 55 112893.522820 3.412073 \n",
|
|||
|
|
"4 49 560168.667962 4.651358 \n",
|
|||
|
|
"0 25 57501.203108 17.261817 \n",
|
|||
|
|
"6 9 194997.827408 18.003149 \n",
|
|||
|
|
"5 1 59193.615277 6.061456 \n",
|
|||
|
|
"7 1 216280.070269 4.400942 \n",
|
|||
|
|
"9 1 75884.615385 4.849468 \n",
|
|||
|
|
"\n",
|
|||
|
|
" flow_freq_med n_tx_total_med avg_n_isin_held_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 1.000000 1525.0 10.853846 \n",
|
|||
|
|
"8 0.971825 695.0 6.411072 \n",
|
|||
|
|
"3 0.096169 5.5 1.000000 \n",
|
|||
|
|
"2 1.000000 2817.0 17.783333 \n",
|
|||
|
|
"4 1.000000 12261.0 39.669231 \n",
|
|||
|
|
"0 0.959184 1528.0 6.569231 \n",
|
|||
|
|
"6 0.984615 7055.0 11.907692 \n",
|
|||
|
|
"5 1.000000 2414.0 43.123077 \n",
|
|||
|
|
"7 1.000000 8591.0 44.053846 \n",
|
|||
|
|
"9 0.282051 16.0 1.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_isin_total_med avg_holding_months_per_isin_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 23.0 50.500000 \n",
|
|||
|
|
"8 15.0 33.385294 \n",
|
|||
|
|
"3 2.0 32.303571 \n",
|
|||
|
|
"2 32.0 36.075000 \n",
|
|||
|
|
"4 64.0 66.357143 \n",
|
|||
|
|
"0 21.0 31.894737 \n",
|
|||
|
|
"6 51.0 29.066667 \n",
|
|||
|
|
"5 103.0 54.427184 \n",
|
|||
|
|
"7 78.0 73.423077 \n",
|
|||
|
|
"9 1.0 39.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" exit_rate_per_isin_med flow_direction_balance_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 0.576923 -0.150123 \n",
|
|||
|
|
"8 0.500000 0.261299 \n",
|
|||
|
|
"3 0.000000 0.411282 \n",
|
|||
|
|
"2 0.567164 0.247587 \n",
|
|||
|
|
"4 0.630952 0.030313 \n",
|
|||
|
|
"0 0.775000 0.023211 \n",
|
|||
|
|
"6 0.400000 -0.042623 \n",
|
|||
|
|
"5 0.728155 0.015762 \n",
|
|||
|
|
"7 0.551282 0.223073 \n",
|
|||
|
|
"9 0.000000 0.152174 \n",
|
|||
|
|
"\n",
|
|||
|
|
" redemption_bias_med aum_drawdown_last_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 -1.021636e+00 5.794239e-01 \n",
|
|||
|
|
"8 -1.000000e+00 1.217922e-03 \n",
|
|||
|
|
"3 -1.038075e-01 1.023987e-01 \n",
|
|||
|
|
"2 -1.017058e+00 1.823180e-02 \n",
|
|||
|
|
"4 -1.079437e+00 2.594109e-02 \n",
|
|||
|
|
"0 -1.001768e+00 5.792661e-01 \n",
|
|||
|
|
"6 -1.023016e+00 2.153833e-14 \n",
|
|||
|
|
"5 -1.017068e+00 1.076916e-14 \n",
|
|||
|
|
"7 -1.034850e+00 2.553513e-15 \n",
|
|||
|
|
"9 -6.666667e+12 6.853147e-01 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_final_to_peak_med corr_flow_fund_lag3_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 0.420576 0.023201 \n",
|
|||
|
|
"8 0.998782 0.031597 \n",
|
|||
|
|
"3 0.897601 0.017582 \n",
|
|||
|
|
"2 0.981768 0.024635 \n",
|
|||
|
|
"4 0.974059 0.136713 \n",
|
|||
|
|
"0 0.420734 0.033544 \n",
|
|||
|
|
"6 1.000000 0.001358 \n",
|
|||
|
|
"5 1.000000 0.163810 \n",
|
|||
|
|
"7 1.000000 0.135939 \n",
|
|||
|
|
"9 0.314685 -0.135836 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_fund_lag6_med corr_flow_rate_lag3_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 0.035668 -0.023551 \n",
|
|||
|
|
"8 0.000212 -0.053542 \n",
|
|||
|
|
"3 -0.003086 0.030811 \n",
|
|||
|
|
"2 0.036707 0.144550 \n",
|
|||
|
|
"4 0.120367 -0.145236 \n",
|
|||
|
|
"0 0.005462 0.008353 \n",
|
|||
|
|
"6 0.001471 0.005738 \n",
|
|||
|
|
"5 -0.062279 -0.241822 \n",
|
|||
|
|
"7 0.057736 -0.124601 \n",
|
|||
|
|
"9 0.145882 -0.113162 \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_rate_lag6_med corr_flow_bench_lag3_med \\\n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 -0.044320 NaN \n",
|
|||
|
|
"8 -0.021419 NaN \n",
|
|||
|
|
"3 0.030144 NaN \n",
|
|||
|
|
"2 0.179594 NaN \n",
|
|||
|
|
"4 -0.105743 -0.009615 \n",
|
|||
|
|
"0 0.011572 NaN \n",
|
|||
|
|
"6 0.009084 NaN \n",
|
|||
|
|
"5 -0.208029 0.235343 \n",
|
|||
|
|
"7 -0.139955 -0.287599 \n",
|
|||
|
|
"9 -0.214312 NaN \n",
|
|||
|
|
"\n",
|
|||
|
|
" corr_flow_bench_lag6_med \n",
|
|||
|
|
"cluster_k10 \n",
|
|||
|
|
"1 NaN \n",
|
|||
|
|
"8 NaN \n",
|
|||
|
|
"3 NaN \n",
|
|||
|
|
"2 NaN \n",
|
|||
|
|
"4 0.012359 \n",
|
|||
|
|
"0 NaN \n",
|
|||
|
|
"6 NaN \n",
|
|||
|
|
"5 -0.239479 \n",
|
|||
|
|
"7 -0.015040 \n",
|
|||
|
|
"9 NaN "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"profile_vars = [\n",
|
|||
|
|
" \"aum_qty_mean\",\n",
|
|||
|
|
" \"gross_flow_to_aum\",\n",
|
|||
|
|
" \"flow_freq\",\n",
|
|||
|
|
" \"n_tx_total\",\n",
|
|||
|
|
" \"avg_n_isin_held\",\n",
|
|||
|
|
" \"n_isin_total\",\n",
|
|||
|
|
" \"avg_holding_months_per_isin\",\n",
|
|||
|
|
" \"exit_rate_per_isin\",\n",
|
|||
|
|
" \"flow_direction_balance\",\n",
|
|||
|
|
" \"redemption_bias\",\n",
|
|||
|
|
" \"aum_drawdown_last\",\n",
|
|||
|
|
" \"aum_final_to_peak\",\n",
|
|||
|
|
" \"corr_flow_fund_lag3\",\n",
|
|||
|
|
" \"corr_flow_fund_lag6\",\n",
|
|||
|
|
" \"corr_flow_rate_lag3\",\n",
|
|||
|
|
" \"corr_flow_rate_lag6\",\n",
|
|||
|
|
" \"corr_flow_bench_lag3\",\n",
|
|||
|
|
" \"corr_flow_bench_lag6\"\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"profile_vars = [c for c in profile_vars if c in dfc.columns]\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in [2, 5, 6, 10]:\n",
|
|||
|
|
" print(f\"\\n===== K={k} =====\")\n",
|
|||
|
|
" prof = (\n",
|
|||
|
|
" dfc.groupby(f\"cluster_k{k}\")\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_clients=(ID_COL, \"count\"),\n",
|
|||
|
|
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values(\"n_clients\", ascending=False)\n",
|
|||
|
|
" )\n",
|
|||
|
|
" display(prof)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 268,
|
|||
|
|
"id": "993b081d-fc75-4d70-bfba-0c4c027cf540",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAj8lJREFUeJzt3XdYk1cbB+BfEkDAyXKLG0SGgLgAQRGr4iq4FfxUrKNqFWur1qp1tNDWXbXuvavgBEfdAzcKKloRBaQOlqKAjOR8f9BEQgIkkEme+7q8JCdv3vfJIQlPzuQwxhgIIYQQQnQYV90BEEIIIYSoGyVEhBBCCNF5lBARQgghROdRQkQIIYQQnUcJESGEEEJ0HiVEhBBCCNF5lBARQgghROdRQkQIIYQQnUcJESGEEEJ0HiVEGiYgIAB9+vRR6Dmtra2xcOFChZ5TFqGhobC2tsbLly9Vfm0AmDVrFry8vNRybSLu5cuXsLa2RmhoqLpDURhlPScvLy/MmjVLoefU5OsqQ0BAAAICAtQdhkIcPnwYPXv2hK2tLVxcXNQdTqWmp+4ANFl0dDQOHz6MGzduIDk5GbVq1UKbNm0wbdo0NG3aVOzYgIAA3Lx5EwDA4XBgbGwMCwsLODg44Msvv4Sbm5s6ngIhMouLi0NERAR8fX3RsGHDcp3j2LFjSEtLw6hRoxQbXCVz9+5dXL16Ff/73/9Qo0YNdYdTYbt374aRkRH8/PzUHYpUb968wYEDB+Dt7Q0bGxt1hyOzZ8+eYfbs2ejcuTPGjRsHQ0NDdYdUqVFCVIpNmzbh7t276NmzJ6ytrZGSkoLdu3fDz88P+/fvh5WVldjxdevWxfTp0wEAOTk5SEhIwJkzZ3D06FH06tULv//+O/T19dXxVNSif//+6N27NwwMDNQdCpFBXFwcVq9ejfbt25c7ITp+/DiePn0qkRA1aNAA0dHR0NOjjxwAiIqKwurVq+Hr6yuREJ08eRIcDkflMVXkunv37oWJiYnGJESbN28Wu/327VusXr0aDRo00KqE6ObNmxAIBJgzZw4aN26s7nAqPfp0KsWoUaOwZMkSsT/oPj4+6Nu3LzZs2IAlS5aIHV+9enX0799frGzGjBlYvHgx9uzZgwYNGuC7775TSeyagMfjgcfjKex8OTk5MDIyUtj5iOpwOBxUqVJF3WGUSiAQID8/X+1xqusLRGX64lJZnktaWhqAwr8tpWGMITc3l1qQKojGEJXC2dlZ4o3VpEkTtGzZEvHx8TKdg8fj4ccff0SLFi2we/dufPjwQabHPXjwAEOHDoWDgwO8vLywd+9eiWPy8vKwatUqdO/eHXZ2dvD09MRvv/2GvLw8qef8+++/0adPH9jZ2aF37964dOmS2P3Jycn46aef0KNHDzg4OKBDhw745ptvxMYAxcTEwNraGmFhYRLnv3z5MqytrXH+/HkAJY8h2r17N3r37g07Ozu4u7tjwYIFyMzMFDtGOJbqwYMHGDFiBNq0aYNly5aJnse4cePg7u4OOzs7eHt7Y82aNeDz+TLUrKSYmBgEBgaiQ4cOovqePXu22DECgQDbtm1D7969YW9vD1dXV8ybNw/v37+XOO6PP/6Au7s72rRpg4CAAMTFxUmMzxDWze3bt7F48WJ07NgRLi4umDdvHvLy8pCZmYnvv/8e7dq1Q7t27fDbb7+BMVaumLy8vDB+/Hjcvn0bAwcOhL29Pbp164bDhw+LxTN16lQAwMiRI2FtbQ1ra2vcuHFD5joPCAjAhQsXkJycLHq8cAxXSeNtIiMjMXz4cDg6OsLFxQUTJ07Es2fPxI75448/YG1tjYSEBMyaNQsuLi5o27YtZs+ejZycHLFj09PT8ezZM4lyaYRj644ePSqqw8uXLwMo7GKZPXs2XF1dRe+XgwcPlnnOx48fY9asWejWrRvs7e3h5uaG2bNnIyMjQ+z5/PbbbwCAbt26iepK+D4p+lqR5/1WkbiLXxf4/Bq9c+cOgoOD0bFjRzg6OmLSpElIT08Xe9zTp09x8+ZN0XMpOn4nMzMTP//8Mzw9PWFnZ4fu3btjw4YNEAgEomOEr4/Nmzdj//798Pb2hp2dHQYMGIDo6GixOFNSUjB79mx4eHiIPkMmTpwo9jlTdAzRjRs3MHDgQADA7NmzRTGGhoZi1apVsLW1FXs+QnPnzoWLiwtyc3Ol1tfmzZthbW2N5ORkifuWLl0KOzs70XvxxYsXmDJlCtzc3GBvbw8PDw8EBQWV+vfAy8sLf/zxBwCgU6dOsLa2Ft0WvqcvX74MPz8/ODg4YN++fTLXt/C4WbNmoW3btnBxccHMmTMRGxsr8T4taTyWtDGaivxMKhrnL7/8Ai8vL9jZ2cHDwwPff/890tPTkZWVBUdHRyxevFjica9fv4aNjQ3Wr19fYh0XRy1EcmKMITU1FS1btpT5MTweD71798bKlStx584ddOnSpdTj379/j3HjxqFXr17o3bs3IiIi8NNPP0FfX1/0xhYIBJg4cSLu3LmDwYMHo3nz5vjnn3+wfft2vHjxAmvXrhU75507d3D69GkMHz4cVatWxc6dO/HNN9/g/PnzMDExAVD44RsVFYXevXujbt26SE5Oxt69ezFy5EicOHECRkZGsLe3R6NGjURjTYoKDw9HzZo14e7uXuJz++OPP7B69Wq4urpi2LBheP78Ofbu3YuYmBjs3btXrEvx3bt3+Oqrr9C7d2/069cPZmZmAICwsDAYGxtj9OjRMDY2xvXr17Fq1Sp8/PgRM2fOlPn3AhR+AwsMDISJiQnGjRuHGjVq4OXLlzhz5ozYcfPmzUNYWBj8/PwQEBCAly9fYvfu3Xj06JFY3EuXLsWmTZvQtWtXdO7cGY8fP0ZgYGCJH6qLFy+Gubk5pkyZgvv372P//v2oXr06oqKiUK9ePQQFBeHSpUvYvHkzrKys8OWXX8odEwAkJCRg6tSpGDhwIHx9fXHo0CHMmjULtra2aNmyJdq1a4eAgADs3LkTEyZMQLNmzQAAzZs3l7nOJ0yYgA8fPuD169eihLJq1aol1v21a9fw1VdfoWHDhpg8eTI+ffqEXbt2YdiwYQgNDZXotps2bRoaNmyI6dOn49GjR/jrr79gamoq1uq6e/durF69Gjt27ECHDh1K/d0DwPXr1xEREYERI0bAxMQEDRo0QGpqKgYPHgwOh4MRI0bA1NQUly5dwpw5c/Dx48dSx0ddu3YNSUlJ8PPzg4WFBZ4+fYoDBw4gLi4OBw4cAIfDQffu3fHixQscP34cs2fPFr3/TE1NJc4nz/utInGXZvHixahRowYmT56M5ORkbN++HQsXLsSKFSsAAD/88AMWLVoEY2NjTJgwAQBgbm4OoLBV19/fH2/evMHQoUNRr149REVFYdmyZUhJScGcOXPErnX8+HFkZWVhyJAh4HA42LRpE6ZMmYK///5b9HqeMmUK4uLi4O/vjwYNGiA9PR1Xr17Fq1evpHb1Nm/eHN988w1WrVqFIUOGoG3btgAKv/C2bdsWa9asQXh4OPz9/UWPycvLw6lTp/DFF1+U2GIoHAYRERGBsWPHit0XEREBNzc31KxZE3l5eQgMDEReXh78/f1hbm6ON2/e4MKFC8jMzCyx9eeHH37A4cOHcebMGfz0008wNjaGtbW16P7nz5/j22+/xZAhQzB48GA0bdpU5vpmjOHrr7/GnTt3MHToUDRv3hxnzpyR+/OzOEV+JgFAVlYWRowYgWfPnmHAgAFo3bo1MjIycO7cObx58wY2Njbw9vZGREQEZs+eLdYjcfz4cTDG0LdvX9mfACNyOXz4MLOysmJ//fWXWLm/vz/r3bt3iY87c+YMs7KyYtu3by/1/P7+/szKyopt2bJFVJabm8v69+/POnXqxPLy8kRxtGrVit26dUvs8Xv37mVWVlbszp07ojIrKytma2vLEhISRGWxsbHMysqK7dy5U1SWk5MjEU9UVBSzsrJiYWFhorKlS5cyW1tb9u7dO7EYXVxc2OzZs0Vlhw4dYlZWViwpKYkxxlhaWhqztbVlY8aMYXw+X3Tcrl27mJWVFTt48KBEPezdu1ciJml
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Analyse graphique des clusters\n",
|
|||
|
|
"\n",
|
|||
|
|
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
|
|||
|
|
"thr_freq = dfc[\"flow_freq\"].median()\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"for name, g in dfc[~dfc['cluster_k5'].isin([4.0])].groupby(\"cluster_k5\"):\n",
|
|||
|
|
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=name)\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.yscale(\"log\")\n",
|
|||
|
|
"plt.axvline(thr_freq, linestyle=\"--\")\n",
|
|||
|
|
"plt.axhline(thr_int, linestyle=\"--\")\n",
|
|||
|
|
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
|
|||
|
|
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
|
|||
|
|
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
|
|||
|
|
"plt.legend(markerscale=2)\n",
|
|||
|
|
"plt.ylim(0.1,100)\n",
|
|||
|
|
"plt.show()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 270,
|
|||
|
|
"id": "fcc7e8fb-c99e-4c64-8d36-32923f997d91",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAnTNJREFUeJzs3XdYU9cbB/BvEoiAILLEibgYshUXWlDEuuoAV7Xozz2qVq22jta2jhbbaq1WrXtbR1Wc4Ki7ihsFBdzKEJWlKKJAcn5/0ERCEsiFTHg/z9On5uTm3vee3CQv9yweY4yBEEIIIaQS4+s6AEIIIYQQXaOEiBBCCCGVHiVEhBBCCKn0KCEihBBCSKVHCREhhBBCKj1KiAghhBBS6VFCRAghhJBKjxIiQgghhFR6lBARQgghpNKjhEjPDB48GJ988ola9+ns7Iy5c+eqdZ+q2Lt3L5ydnZGcnKz1YwPAjBkzEBgYqJNjE1nJyclwdnbG3r17dR2K2mjqnAIDAzFjxgy17lOfj6sJgwcPxuDBg3Udhlrs27cPXbp0gZubG3x9fXUdToVmpOsA9FlMTAz27duHS5cuISUlBdWrV4eXlxcmT56MBg0ayGw7ePBgXL58GQDA4/FgZmYGOzs7eHp6onfv3mjbtq0uToEQld2/fx+RkZEIDg5G3bp1y7SPgwcPIiMjA0OHDlVvcBXM9evXcf78efzvf/9DtWrVdB1OuW3btg2mpqYICQnRdSgKPX/+HLt27UJQUBBcXV11HY7KHjx4gJkzZ+Kjjz7C6NGjYWJiouuQKjRKiEqwdu1aXL9+HV26dIGzszPS0tKwbds2hISEYOfOnXBycpLZvmbNmvjyyy8BALm5uXjy5AmOHz+OAwcOoGvXrvj1119hbGysi1PRiV69eqF79+4QCoW6DoWo4P79+1i2bBlatmxZ5oTo0KFDuHfvnlxCVKdOHcTExMDIiL5yACA6OhrLli1DcHCwXEJ05MgR8Hg8rcdUnuNu374dVlZWepMQrVu3TubxixcvsGzZMtSpU8egEqLLly9DLBbjm2++Qf369XUdToVH304lGDp0KBYuXCjzg96tWzf06NEDq1evxsKFC2W2t7CwQK9evWTKpk2bhvnz5+Ovv/5CnTp18NVXX2kldn0gEAggEAjUtr/c3FyYmpqqbX9Ee3g8HqpUqaLrMEokFouRn5+v8zh19QdERfrDpaKcS0ZGBoDC35aSMMbw/v17uoNUTtSHqATNmjWT+2A5OjqiSZMmePjwoUr7EAgE+Pbbb9G4cWNs27YNr1+/Vul1t27dwqeffgpPT08EBgZi+/btctvk5eVh6dKl6NSpE9zd3REQEIBffvkFeXl5Cvf5zz//4JNPPoG7uzu6d++Os2fPyjyfkpKCH374AZ07d4anpydatWqFL774QqYPUGxsLJydnREeHi63/3PnzsHZ2RmnTp0CoLwP0bZt29C9e3e4u7ujXbt2mDNnDrKzs2W2kfSlunXrFj777DN4eXnht99+k57H6NGj0a5dO7i7uyMoKAjLly+HSCRSoWblxcbGYsSIEWjVqpW0vmfOnCmzjVgsxsaNG9G9e3d4eHjAz88P3333HV69eiW33R9//IF27drBy8sLgwcPxv379+X6Z0jq5urVq5g/fz5at24NX19ffPfdd8jLy0N2dja+/vprtGjRAi1atMAvv/wCxliZYgoMDMSYMWNw9epV9O3bFx4eHujYsSP27dsnE8+kSZMAAEOGDIGzszOcnZ1x6dIllet88ODBOH36NFJSUqSvl/ThUtbfJioqCoMGDYK3tzd8fX0xbtw4PHjwQGabP/74A87Oznjy5AlmzJgBX19fNG/eHDNnzkRubq7MtpmZmXjw4IFcuSKSvnUHDhyQ1uG5c+cAFDaxzJw5E35+ftLPy+7du0vdZ0JCAmbMmIGOHTvCw8MDbdu2xcyZM5GVlSVzPr/88gsAoGPHjtK6knxOil4rXD5v5Ym7+HGBD9fotWvXEBYWhtatW8Pb2xvjx49HZmamzOvu3buHy5cvS8+laP+d7Oxs/PjjjwgICIC7uzs6deqE1atXQywWS7eRXB/r1q3Dzp07ERQUBHd3d/Tp0wcxMTEycaalpWHmzJnw9/eXfoeMGzdO5numaB+iS5cuoW/fvgCAmTNnSmPcu3cvli5dCjc3N5nzkZg9ezZ8fX3x/v17hfW1bt06ODs7IyUlRe65RYsWwd3dXfpZfPz4MSZOnIi2bdvCw8MD/v7+mDJlSom/B4GBgfjjjz8AAG3atIGzs7P0seQzfe7cOYSEhMDT0xM7duxQub4l282YMQPNmzeHr68vpk+fjvj4eLnPqbL+WIr6aKrzO6lonD/99BMCAwPh7u4Of39/fP3118jMzEROTg68vb0xf/58udc9e/YMrq6uWLVqldI6Lo7uEHHEGEN6ejqaNGmi8msEAgG6d++OJUuW4Nq1a2jfvn2J27969QqjR49G165d0b17d0RGRuKHH36AsbGx9IMtFosxbtw4XLt2Df3790ejRo1w9+5dbNq0CY8fP8aKFStk9nnt2jUcO3YMgwYNQtWqVbFlyxZ88cUXOHXqFKysrAAUfvlGR0eje/fuqFmzJlJSUrB9+3YMGTIEhw8fhqmpKTw8PFCvXj1pX5OiIiIiYGlpiXbt2ik9tz/++APLli2Dn58fBg4ciEePHmH79u2IjY3F9u3bZZoUX758iVGjRqF79+7o2bMnbGxsAADh4eEwMzPDsGHDYGZmhosXL2Lp0qV48+YNpk+frvL7AhT+BTZixAhYWVlh9OjRqFatGpKTk3H8+HGZ7b777juEh4cjJCQEgwcPRnJyMrZt24a4uDiZuBctWoS1a9eiQ4cO+Oijj5CQkIARI0Yo/VKdP38+bG1tMXHiRNy8eRM7d+6EhYUFoqOjUatWLUyZMgVnz57FunXr4OTkhN69e3OOCQCePHmCSZMmoW/fvggODsaePXswY8YMuLm5oUmTJmjRogUGDx6MLVu2YOzYsWjYsCEAoFGjRirX+dixY/H69Ws8e/ZMmlBWrVpVad1fuHABo0aNQt26dTFhwgS8e/cOW7duxcCBA7F37165ZrvJkyejbt26+PLLLxEXF4e///4b1tbWMnddt23bhmXLlmHz5s1o1apVie89AFy8eBGRkZH47LPPYGVlhTp16iA9PR39+/cHj8fDZ599Bmtra5w9exbffPMN3rx5U2L/qAsXLiApKQkhISGws7PDvXv3sGvXLty/fx+7du0Cj8dDp06d8PjxYxw6dAgzZ86Ufv6sra3l9sfl81aeuEsyf/58VKtWDRMmTEBKSgo2bdqEuXPn4vfffwcAzJo1C/PmzYOZmRnGjh0LALC1tQVQeFc3NDQUz58/x6effopatWohOjoav/32G9LS0vDNN9/IHOvQoUPIycnBgAEDwOPxsHbtWkycOBH//POP9HqeOHEi7t+/j9DQUNSpUweZmZk4f/48UlNTFTb1NmrUCF988QWWLl2KAQMGoHnz5gAK/+Bt3rw5li9fjoiICISGhkpfk5eXh6NHj+Ljjz9WesdQ0g0iMjISI0eOlHkuMjISbdu2haWlJfLy8jBixAjk5eUhNDQUtra2eP78OU6fPo3s7Gyld39mzZqFffv24fjx4/jhhx9gZmYGZ2dn6fOPHj3C1KlTMWDAAPTv3x8NGjRQub4ZY/j8889x7do1fPrpp2jUqBGOHz/O+fuzOHV+JwFATk4OPvvsMzx48AB9+vRB06ZNkZWVhZMnT+L58+dwdXVFUFAQIiMjMXPmTJkWiUOHDoExhh49eqh+Aoxwsm/fPubk5MT+/vtvmfLQ0FDWvXt3pa87fvw4c3JyYps2bSpx/6GhoczJyYmtX79eWvb+/XvWq1cv1qZNG5aXlyeNw8XFhV25ckXm9du3b2dOTk7s2rVr0jInJyfm5ubGnjx5Ii2Lj49nTk5ObMuWLdKy3NxcuXiio6OZk5MTCw8Pl5YtWrSIubm5sZcvX8rE6Ovry2bOnCkt27NnD3NycmJJSUmMMcYyMjKYm5sbGz58OBOJRNLttm7dypy
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"thr_churn = dfc[\"aum_final_to_peak\"].median()\n",
|
|||
|
|
"thr_freq = dfc[\"flow_freq\"].median()\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"for name, g in dfc[~dfc['cluster_k5'].isin([3.0])].groupby(\"cluster_k5\"):\n",
|
|||
|
|
" plt.scatter(g[\"flow_freq\"], g[\"aum_final_to_peak\"], s=10, label=name)\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.yscale(\"log\")\n",
|
|||
|
|
"plt.axvline(thr_freq, linestyle=\"--\")\n",
|
|||
|
|
"plt.axhline(thr_churn, linestyle=\"--\")\n",
|
|||
|
|
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
|
|||
|
|
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
|
|||
|
|
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
|
|||
|
|
"plt.legend(markerscale=2)\n",
|
|||
|
|
"plt.ylim(0.01,100)\n",
|
|||
|
|
"plt.show()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 304,
|
|||
|
|
"id": "4eb9cef0-c016-40c4-bb25-ebdaa941697c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABNgAAAGGCAYAAACpCjxcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAslJJREFUeJzs3XlYzen/P/DnabOlqCypbKGQpVS2BlP2ZYSxDbKEsY/sZW1IliyRkSwlM3ZChMGMsUwq2bIbW1k/yRKK6nR+f/TrfB0VqXN6d97n+biurqvuc+o8byenc17nvl+3RCaTyUBEREREREREREQFoiV0ACIiIiIiIiIiInXGAhsREREREREREVEhsMBGRERERERERERUCCywERERERERERERFQILbERERERERERERIXAAhsREREREREREVEhsMBGRERERERERERUCCywERERERERERERFQILbERERERERERERIXAAhsREVEx5uzsjBkzZggdo0jt3bsXVlZWePTokdBRiIiIiIjyhQU2IiIiAcTHx2POnDlwcXFBgwYNYGdnh379+mHz5s348OFDkWRITU3F6tWrERUVVSS3p67Cw8MREhIidIxix8rKCr/++muO8cDAQFhZWcHT0xOZmZkF/vl3797FkiVL0L17d9ja2sLJyQkjR45EXFxcYWITERERqQQLbEREREXs5MmT6NatGw4fPozvv/8es2fPxuTJk1GlShUsXboUPj4+RZIjNTUVAQEBiI6OLpLby6/u3bvjypUrMDMzEzoKAODgwYMIDQ0VOoZaCAoKwooVK9CjRw/4+PhAS6vgTzV3796NXbt2wcbGBjNmzMCQIUNw//599O3bF//++68SUxMREREVno7QAYiIiDRJQkICPDw8UKVKFWzevBkVK1aUXzZgwAA8fPgQJ0+eFC6gEqSkpKB06dIF/n5tbW1oa2srMVHxlJqailKlSgkdQ2k2bNiAZcuWwdXVFQsXLixUcQ0AunTpgnHjxqFMmTLysV69eqFz585YvXo1WrRoUdjIRERERErDFWxERERFaMOGDUhJSYGPj49CcS1btWrVMHjw4Dy/f/Xq1bCyssoxnlvfsri4OLi7u6Np06Zo2LAhnJ2d4enpCQB49OgRmjdvDgAICAiAlZUVrKyssHr1avn33717FxMmTICjoyMaNGiAnj174sSJE7nebnR0NObNm4fmzZujdevWX/w32LJlC7p06YJGjRrBwcEBPXv2RHh4+BfnkpmZidWrV8PJyQmNGjXCoEGD8N9//+XoUZf9vbGxsfD19UWzZs3QuHFjjB07Fi9fvlTIcfz4cYwcORJOTk6wsbFB27ZtsWbNGkilUvl1Bg0ahJMnT+Lx48fyfyNnZ+c8cwJAVFQUrKysFLbeDho0CF27dsXVq1cxYMAANGrUCMuXLwcApKWlYdWqVWjXrh1sbGzQunVrLFmyBGlpaQo/9+zZs+jfvz/s7e1ha2uLDh06yH+G0IKDg7F06VL88MMP8PX1LXRxDQBsbGwUimsAUL58edjb2+PevXuF/vlEREREysQVbEREREXo77//hoWFBezs7FR6O0lJSXB3d0f58uUxcuRIGBgY4NGjRzh27BgAwMjICPPmzcO8efPQrl07tGvXDgDkxbs7d+6gf//+qFSpEkaMGIHSpUvj8OHDGDt2LFavXi2/fjZvb28YGRlh7NixSElJyTPXzp07sWDBAnTo0AFubm74+PEjbt26hcuXL6Nbt255ft+yZcuwYcMGfP/99/juu+9w8+ZNuLu74+PHj7lef8GCBTAwMMC4cePw+PFjbN68Gb/++itWrlwpv05YWBhKly6NoUOHonTp0jh37hxWrVqFd+/eYfr06QCAUaNG4e3bt3j27Jm8OPl50Se/Xr9+jREjRqBLly744YcfYGxsjMzMTIwePRqxsbHo06cPLC0tcfv2bWzevBkPHjzAb7/9BiDr/vj5559hZWWFCRMmQE9PDw8fPsSFCxcKlEWZNm/ejEWLFqFr165YtGhRrsW1z4ubedHX14eent4Xr5OYmIhy5coVJCoRERGRyrDARkREVETevXuH58+fw8XFReW3dfHiRbx58wYbN25EgwYN5OMeHh4AgNKlS6NDhw6YN28erKys0L17d4Xv9/HxgampKfbs2SMvePz000/o378//Pz8chTYDA0NERIS8tWtnSdPnkTt2rWxatWqfM/lxYsXCAkJka8wyxYQEKCw4u5T5cqVw6ZNmyCRSABkrYDbsmUL3r59i7JlywLIKtqVLFlS/j39+/fHnDlzsG3bNnh4eEBPTw8tW7ZEaGgokpOTc/wbfavExER4e3ujX79+8rH9+/fj33//xZYtW2Bvby8fr127NubOnYsLFy7Azs4OZ8+eRXp6OtavXw8jI6NC5VCm7NV9Xbt2xZIlS/K8/7NXS36Nr68vevbsmefl58+fx6VLlzB69OgC5SUiIiJSFRbYiIiIisi7d+8AFHwF1LfILiKdPHkS1tbW0NXVzff3vn79GufOncOECRPkmbM5OTlh9erVeP78OSpVqiQf79OnT776phkYGODZs2e4cuUKGjZsmK88kZGRyMjIwE8//aQwPnDgwDwLbH369JEX1wDA3t4eISEhePz4MaytrQFAobj27t07pKWlwd7eHjt27MC9e/fk11MWPT29HMWjI0eOwNLSEjVr1lRY5dWsWTMAWdtN7ezsYGBgAAA4ceIEevXqpZQtmMrw4sULAIC5ufkX7//g4OB8/bxatWrleVlSUhImT54Mc3NzDB8+/NuCEhEREakYC2xERERFRF9fHwDw/v17ld+Wo6MjOnTogICAAISEhMDR0RFt27ZFt27dvroFLz4+HjKZDP7+/vD398/1OklJSQoFNnNz83zlGjFiBP7991/07t0b1apVQ8uWLdG1a1c0adIkz+958uQJAKBq1aoK4+XKlYOhoWGu31OlShWFr7MLVMnJyfKxO3fuYOXKlTh37lyOQuLbt2/zNZ9vUalSpRz/9g8fPsTdu3fzXOGVlJQEAOjcuTN27dqFWbNmYdmyZWjevDnatWuHjh07frHY9vr1a6Snpxcor6Gh4Vd/V1xdXfG///0PgYGBKF++PIYMGZLr9Qp7IEFKSgp+/vlnvH//Hlu3bi2SIjURERHRt2CBjYiIqIjo6+ujYsWKuHPnToF/xqersj71aWP+7OutWrUKly5dwt9//43Tp0/Dy8sLwcHB2LFjxxcLFJmZmQCAYcOG4bvvvsv1Op8Xu0qUKJGv/JaWljhy5AhOnjyJ06dP488//8TWrVsxduxYTJgwIV8/Iz/yKjrJZDIAWYW2gQMHQl9fHxMmTEDVqlVRokQJXLt2DX5+fvJ/gy/J677I63s/XTH36XXr1Kkj7+/2ucqVK8u/948//kBUVJT83y4iIgI7duzApk2b8lw9Nn78eERHR391LrkJDQ1F06ZNv3gdHR0d+Pv7Y/jw4Vi0aBHKli2LXr165bheYmJivm6zbNmyOf6d0tLSMH78eNy6dQsbN25EnTp18j8JIiIioiLCAhsREVER+v7777Fjxw5cvHgRtra23/z9n67Eyv4c+L9VXp9r3LgxGjduDA8PD4SHh2PKlCmIiIhA79698ywQWVhYAAB0dXULvfIoN6VLl0bnzp3RuXNnefEkMDAQP//8c66FuuzVaPHx8fJsAPDq1Su8efOmQBmio6Px+vVrBAQEwMHBQT7++YmgQN6FtOx//89Xuz1+/DjfOapWrYqbN2+iefPmed5ONi0tLTRv3hzNmzeHp6cnAgMDsWLFCkRFReV5P02fPl1h1d63yO8W2RIlSmDt2rVwc3PD7NmzYWBgkKNHn5OTU75+1uc92DIzMzF9+nRERkZi5cqVcHR0zP8EiIiIiIoQC2xERERFaPjw4QgPD8esWbOwefNmmJiYKFweHx+Pv//+G4MHD871+7NXjsXExMgPS0hJScG+ffsUrvfmzRsYGBgoFG3q1q0LIGtFEACUKlUKAHIUYIyNjeHo6IgdO3Zg4MCBqFixosLlL1++LHCj/VevXqF8+fLyr/X09GBpaYl
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1400x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMsAAAGGCAYAAABlkwa3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAs1FJREFUeJzs3XlcTfn/B/DXbbOHrG22UPZKRTSYYuyEsQ2yhLGP7MVEY8mSJTKSpTCDbCHrYMZYhkq2LGGQyvYlSyiq2/390XR/rorUvfd0z309H48ejzrndu/r46bufZ/P5/2RyGQyGYiIiIiIiIiIiAg6QgcgIiIiIiIiIiIqKlgsIyIiIiIiIiIi+g+LZURERERERERERP9hsYyIiIiIiIiIiOg/LJYRERERERERERH9h8UyIiIiIiIiIiKi/7BYRkRERERERERE9B8Wy4iIiIiIiIiIiP7DYhkREREREREREdF/WCwjIiIqwpydnTFjxgyhY6jVnj17YGlpicTERKGjEBEREZEWYrGMiIhIAPHx8fD29oaLiwsaNWoEW1tb9OvXD5s2bcL79+/VkiE1NRWrVq1CRESEWh5PU4WHhyMkJEToGEWOpaUlfvnllxzHAwMDYWlpCU9PT2RmZhb4/hMTE2FpaZnrx8GDBwsTnYiIiOiz9IQOQEREpG1OnjyJn376CQYGBujevTvq1q2L9PR0REdHY8mSJfj3338xd+5cledITU1FQEAAxo0bh2bNmqn88fKre/fu6Ny5MwwMDISOAgA4cOAA7ty5gyFDhggdpcgLCgrC8uXL0aNHD8yfPx86OoW/LtulSxe0atVK4Zi1tXWh75eIiIgoLyyWERERqVFCQgI8PDxgYmKCTZs2oXLlyvJzAwYMwIMHD3Dy5EnhAipBSkoKSpYsWeDv19XVha6urhITFU2pqakoUaKE0DGUZv369Vi6dClcXV2xYMECpRTKAKB+/fro3r27Uu6LiIiIKD+4DJOIiEiN1q9fj5SUFMyfP1+hUJatevXqGDx4cJ7fv2rVKlhaWuY4nlufr5iYGLi7u6NZs2Zo3LgxnJ2d4enpCSBriZujoyMAICAgQL68bdWqVfLvv3v3LiZMmAAHBwc0atQIPXv2xIkTJ3J93MjISMyZMweOjo5o3br1Z/8NtmzZgs6dO6NJkyawt7dHz549ER4e/tmxZGZmYtWqVXByckKTJk0waNAg/Pvvvzl6umV/b3R0NHx9fdG8eXNYW1tj7NixePHihUKO48ePY+TIkXByckLDhg3Rtm1brF69GlKpVH6bQYMG4eTJk3j48KH838jZ2TnPnAAQEREBS0tLheWtgwYNQpcuXXDt2jUMGDAATZo0wbJlywAAaWlpWLlyJdq1a4eGDRuidevWWLx4MdLS0hTu9+zZs+jfvz/s7OxgY2OD9u3by+9DaMHBwViyZAm6desGX19fpRXKsqWkpOT49yAiIiJSFc4sIyIiUqO//voL5ubmsLW1VenjJCUlwd3dHeXLl8fIkSNhaGiIxMREHDt2DABgZGSEOXPmYM6cOWjXrh3atWsHAPJC3J07d9C/f39UqVIFI0aMQMmSJXH48GGMHTsWq1atkt8+m4+PD4yMjDB27FikpKTkmWvHjh2YN28e2rdvDzc3N3z48AG3bt3ClStX0LVr1zy/b+nSpVi/fj2+/fZbfPPNN4iNjYW7uzs+fPiQ6+3nzZsHQ0NDjBs3Dg8fPsSmTZvwyy+/YMWKFfLbhIWFoWTJkhg6dChKliyJ8+fPY+XKlXj79i2mT58OABg1ahTevHmDJ0+eyAuNpUqV+sK/fu5evXqFESNGoHPnzujWrRsqVKiAzMxMjB49GtHR0ejTpw8sLCxw+/ZtbNq0CXFxcfj1118BZD0fP/74IywtLTFhwgQYGBjgwYMHuHjxYoGyKNOmTZuwcOFCdOnSBQsXLsy1UPZpoTIvpUuXzrH8NiAgAIsXL4ZEIkGDBg3g4eEBJycnpWQnIiIiyg2LZURERGry9u1bPH36FC4uLip/rEuXLuH169fYsGEDGjVqJD/u4eEBAChZsiTat2+POXPmwNLSMscyt/nz58PY2Bi7d++WFy9++OEH9O/fH35+fjmKZWXLlkVISMgXl0+ePHkSderUwcqVK/M9lufPnyMkJEQ+8ytbQECAwky4j5UrVw4bN26ERCIBkDUzbcuWLXjz5g3KlCkDIKsAV7x4cfn39O/fH97e3ti2bRs8PDxgYGCAli1bYvPmzUhOTi70UsBnz57Bx8cH/fr1kx/bt28f/vnnH2zZsgV2dnby43Xq1MHs2bNx8eJF2Nra4uzZs0hPT8e6detgZGRUqBzKlD3rrkuXLli8eHGez3/2LMYv8fX1Rc+ePQEAOjo6cHJyQtu2bVGlShUkJCQgJCQEI0aMwJo1a9CmTRtlDYOIiIhIAYtlREREavL27VsABZ+Z9DWyC0InT56ElZUV9PX18/29r169wvnz5zFhwgR55mxOTk5YtWoVnj59iipVqsiP9+nTJ199xgwNDfHkyRNcvXoVjRs3zleec+fOISMjAz/88IPC8YEDB+ZZLOvTp4+8UAYAdnZ2CAkJwcOHD2FlZQUACoWyt2/fIi0tDXZ2dggNDcW9e/fkt1MWAwMDeSEo25EjR2BhYYFatWopzL5q3rw5gKwlnba2tjA0NAQAnDhxAr169VL6MseCev78OQDAzMzss89/cHBwvu6vdu3a8s9NTEywYcMGhfPZmz8sXLiQxTIiIiJSGRbLiIiI1KR06dIAgHfv3qn8sRwcHNC+fXsEBAQgJCQEDg4OaNu2Lbp27frFXSbj4+Mhk8ng7+8Pf3//XG+TlJSkUCwzMzPLV64RI0bgn3/+Qe/evVG9enW0bNkSXbp0QdOmTfP8nkePHgEAqlWrpnC8XLlyKFu2bK7fY2JiovB1drEpOTlZfuzOnTtYsWIFzp8/n6Mo+ObNm3yN52tUqVIlx7/9gwcPcPfu3TxnXiUlJQEAOnXqhJ07d2LWrFlYunQpHB0d0a5dO3To0OGzhbNXr14hPT29QHnLli37xZ8VV1dX/O9//0NgYCDKly+f546hLVq0KFCGT5UrVw49e/ZEUFAQnjx5gqpVqyrlfomIiIg+xmIZERGRmpQuXRqVK1fGnTt3CnwfH8+W+tjHTemzb7dy5UpcvnwZf/31F06fPg0vLy8EBwcjNDT0s7PbMjMzAQDDhg3DN998k+ttPi1cFStWLF/5LSwscOTIEZw8eRKnT5/GH3/8ga1bt2Ls2LGYMGFCvu4jP/IqIMlkMgBZRbOBAweidOnSmDBhAqpVq4ZixYrh+vXr8PPzk/8bfE5ez0Ve3/vxTLaPb1u3bl15P7RPZReDihcvjt9//x0RERHyf7tDhw4hNDQUGzduzHNW1/jx4xEZGfnFseRm8+bNaNas2Wdvo6enB39/fwwfPhwLFy5EmTJl0KtXrxy3e/bsWb4es0yZMrn+O30s+9/k1atXLJYRERGRSrBYRkREpEbffvstQkNDcenSJdjY2Hz19388Qyr7c+D/Z199ytraGtbW1vDw8EB4eDimTJmCQ4cOoXfv3nkWe8zNzQEA+vr6SpsR9LGSJUuiU6dO6NSpE9LS0jB+/HgEBgbixx9/zLXolj1LLD4+Xp4NAF6+fInXr18XKENkZCRevXqFgIAA2Nvby49/urMlkHdRLPvf/9NZaA8fPsx3jmrVqiE2NhaOjo55Pk42HR0dODo6wtHREZ6enggMDMTy5csRERGR5/M0ffp0hdl0XyO/y1CLFSuGNWvWwM3NDT///DMMDQ1z9LTLb0P+j3uW5SX7OSpKvduIiIhIXFgsIyIiUqPhw4cjPDwcs2bNwqZNm1CxYkWF8/Hx8fjrr78wePDgXL8/e0ZXVFSUfKOAlJQU7N27V+F2r1+/hqGhoUIBpl69egCAtLQ0AECJEiUAIEcxpUKFCnBwcEBoaCgGDhyIypUrK5x/8eJFgQsVL1++RPny5eVfGxgYwMLCAqdOnUJ6enquxTJHR0fo6elh27ZtaNmypfz477//XqAMwP/PPMueaQZk/bts3bo1x21
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1400x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMsAAAGGCAYAAABlkwa3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAvTJJREFUeJzs3XlcTfn/B/DXLWUPlaXNFsoSlYpMlinGPsJgDLKEsc9kGYpJjci+hoSRjCVb9mXGNpYvlSWyG6IiRnaK6nZ/fzTdn6ui5d57uue+no9Hj0edc7vn9XFT977v5/P+SGQymQxEREREREREREQEHaEDEBERERERERERFRcslhEREREREREREf2HxTIiIiIiIiIiIqL/sFhGRERERERERET0HxbLiIiIiIiIiIiI/sNiGRERERERERER0X9YLCMiIiIiIiIiIvoPi2VERERERERERET/YbGMiIiIiIiIiIjoPyyWERERFROurq6YMmWK0DHUaufOnbCyskJiYqLQUYiIiIiIALBYRkREpHLx8fHw9fWFm5sbbGxsYG9vj++//x7r16/H+/fv1ZIhNTUVy5YtQ2RkpFqup6n27t2L0NBQoWMUO1ZWVvjtt99yHA8ODoaVlRW8vb2RmZlZpGusXLkSI0aMQIsWLWBlZYVly5bledsnT57gp59+goODA+zt7TFy5EgkJCQU6fpERERE2VgsIyIiUqETJ06ga9euOHjwIL7++mv8+uuvmDBhAkxNTTFv3jzMnDlTLTlSU1MRFBSEqKgotVwvv7p164YrV67AzMxM6CgAgH379iEsLEzoGBohJCQEixYtQvfu3TFz5kzo6BTtaeXixYtx9epV1K9f/7O3e/fuHTw8PBAdHY0ff/wR48aNw40bN9C/f3+8ePGiSBmIiIiIAKCE0AGIiIjEKiEhAV5eXjA1NcX69etRpUoV+bl+/frhwYMHOHHihHABlSAlJQVlypQp9Pfr6upCV1dXiYmKp9TUVJQuXVroGEqzZs0aLFiwAO7u7pg1a1aRC2UAcPToUZibm+P58+dwdnbO83abNm3C/fv3sW3bNjRu3BgA0LJlS3Tt2hXr1q3D+PHji5yFiIiItBtnlhEREanImjVrkJKSgpkzZyoUyrLVqFEDAwcOzPP7ly1bBisrqxzHc+vzFRsbC09PTzRr1gyNGzeGq6srvL29AQCJiYny4kNQUBCsrKxyLHO7e/cuxo0bBycnJ9jY2KBHjx44evRorteNioqCn58fnJ2d0bp168/+G2zYsAGdO3dGkyZN4OjoiB49emDv3r2fHUtmZiaWLVsGFxcXNGnSBAMGDMA///yTo6db9vdeuHABgYGBaN68OWxtbTF69Gg8f/5cIceRI0cwfPhwuLi4oFGjRmjbti2WL18OqVQqv82AAQNw4sQJPHz4UP5v5OrqmmdOAIiMjISVlZXC8tYBAwagS5cuuHr1Kvr164cmTZpg4cKFAIC0tDQsXboU7dq1Q6NGjdC6dWvMnTsXaWlpCvd75swZ9O3bFw4ODrCzs0P79u3l9yG0devWYd68efj2228RGBiolEIZAJibm+frdocPH4aNjY28UAYAlpaWcHZ2xsGDB5WShYiIiLQbZ5YRERGpyPHjx2FhYQF7e3uVXufZs2fw9PREpUqVMHz4cBgYGCAxMRF//fUXAMDQ0BB+fn7w8/NDu3bt0K5dOwCQF+Lu3LmDvn37omrVqhg2bBjKlCmDgwcPYvTo0Vi2bJn89tn8/f1haGiI0aNHIyUlJc9cW7duRUBAANq3bw8PDw98+PABt27dwuXLl9G1a9c8v2/BggVYs2YNvv76a7Rs2RI3b96Ep6cnPnz4kOvtAwICYGBggDFjxuDhw4dYv349fvvtNyxevFh+m4iICJQpUwaDBw9GmTJlcO7cOSxduhRv377F5MmTAQAjRozAmzdv8PjxY3mhsWzZsl/418/dy5cvMWzYMHTu3BnffvstjIyMkJmZiZEjR+LChQvo3bs3LC0tcfv2baxfvx7379/HihUrAGQ9Hj/++COsrKwwbtw46Ovr48GDB7h48WKhsijT+vXrMXv2bHTp0gWzZ8/OtVD2aaEyL+XKlYO+vn6Brp+ZmYlbt26hZ8+eOc7Z2Njg9OnTePv2LcqVK1eg+yUiIiL6GItlREREKvD27Vs8efIEbm5uKr/WpUuX8OrVK6xduxY2Njby415eXgCAMmXKoH379vDz84OVlRW6deum8P0zZ86EiYkJduzYIS9e/PDDD+jbty/mz5+fo1hWoUIFhIaGfnH55IkTJ1C3bl0sXbo032NJTk5GaGiofOZXtqCgoDwbvlesWBG///47JBIJgKyCyoYNG/DmzRuUL18eQFYBrlSpUvLv6du3L3x9fbF582Z4eXlBX18fX331FcLCwvD69esc/0YF9fTpU/j7++P777+XH9u9ezf+97//YcOGDXBwcJAfr1u3LqZPn46LFy/C3t4eZ86cQXp6OlavXg1DQ8Mi5VCm7Fl3Xbp0wdy5c/N8/D+3hPJjgYGB6NGjR4EyvHz5EmlpaahcuXKOc9nH/v33XxbLiIiIqEhYLCMiIlKBt2/fAij8zKSCyC4InThxAtbW1tDT08v39758+RLnzp3DuHHj5Jmzubi4YNmyZXjy5AmqVq0qP967d+989RkzMDDA48ePceXKFYUlc59z9uxZZGRk4IcfflA43r9//zyLZb1795YXygDAwcEBoaGhePjwIaytrQFAoVD29u1bpKWlwcHBAeHh4bh37578dsqir6+foxB06NAhWFpaonbt2gqzr5o3bw4ga0mnvb09DAwMAGT18OrZs6fSljkWVXJyMoCs5ZKfe/zXrVuXr/urU6dOgTNkzy7MbUZayZIlFW5DREREVFgslhEREalA9syWd+/eqfxaTk5OaN++PYKCghAaGgonJye0bdsWXbt2/eIyt/j4eMhkMixZsgRLlizJ9TbPnj1TKJblt7fUsGHD8L///Q+9evVCjRo18NVXX6FLly5o2rRpnt/z6NEjAED16tUVjlesWBEVKlTI9XtMTU0Vvs4uNr1+/Vp+7M6dO1i8eDHOnTuXoyj45s2bfI2nIKpWrZrj3/7Bgwe4e/dunjOvnj17BgDo1KkTtm3bhmnTpmHBggVwdnZGu3bt0KFDh88Wzl6+fIn09PRC5a1QocIXf1bc3d3x77//Ijg4GJUqVcKgQYNyvV2LFi0KlSE/sgtin/Z4A/6/SJZ9GyIiIqLCYrGMiIhIBcqVK4cqVargzp07hb6Pj2dLfezjpvTZt1u6dCliYmJw/PhxnDp1Cj4+Pli3bh3Cw8M/O7stMzMTADBkyBC0bNky19t8WrjKbzHC0tIShw4dwokTJ3Dq1Cn8+eef2LRpE0aPHo1x48bl6z7yI68CkkwmA5BVNOvfvz/KlSuHcePGoXr16ihZsiSuXbuG+fPny/8NPievxyKv7/14JtvHt61Xr568H9qnqlWrJv/ejRs3IjIyUv5vd+DAAYSHh+P333/Pc1bX2LFjERUV9cWx5CYsLAzNmjX77G1KlCiBJUuWYOjQoZg9ezbKly+fa++wp0+f5uua5cuXz/Xf6XMqVqwIfX39XK+RfSy3zTSIiIiICoLFMiIiIhX5+uuvER4ejkuXLsHOzq7A3//xDKnsz4H/n331KVtbW9ja2sLLywt79+7FxIkTceDAAfTq1SvPYo+FhQUAQE9PTyUzgsqUKYNOnTqhU6dOSEtLw9ixYxEcHIwff/wx16Jb9iyx+Ph4eTYAePHiBV69elWoDFFRUXj58iWCgoLg6OgoP/7pzpZA3kWx7H//T2ehPXz4MN85qlevjps3b8LZ2TnP62TT0dGBs7MznJ2d4e3tjeDgYCxatAiRkZF5Pk6TJ09WmE1XEPldhlqyZEmsXLkSHh4e+PXXX2FgYJCjp52Li0u+7qswPct0dHRQr149XL16Nce5K1euwMLCgv3KiIiIqMhYLCMiIlKRoUOHYu/evZg2bRrWr18PY2NjhfPx8fE4fvw4Bg4cmOv3Z8/oio6Olm8UkJKSgl27dinc7tWrVzAwMFAowNS
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1400x400 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"#heatmap\n",
|
|||
|
|
"def robust_zscore_col(s):\n",
|
|||
|
|
" med = np.nanmedian(s)\n",
|
|||
|
|
" mad = np.nanmedian(np.abs(s - med))\n",
|
|||
|
|
" if mad == 0 or np.isnan(mad):\n",
|
|||
|
|
" return np.zeros(len(s))\n",
|
|||
|
|
" return (s - med) / (1.4826 * mad)\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in [2, 5, 10]:\n",
|
|||
|
|
" prof = dfc.groupby(f\"cluster_k{k}\")[profile_vars].median()\n",
|
|||
|
|
" prof_z = prof.copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
" for c in prof.columns:\n",
|
|||
|
|
" # prof_z[c] = robust_zscore_col(prof[c].values)\n",
|
|||
|
|
" prof_z[c] = (prof[c] - prof[c].mean()) / (prof[c].std() + 1e-12)\n",
|
|||
|
|
"\n",
|
|||
|
|
" plt.figure(figsize=(14, 4))\n",
|
|||
|
|
" sns.heatmap(prof_z, cmap=\"RdBu_r\", center=0)\n",
|
|||
|
|
" plt.title(f\"Cluster signatures — K={k}\")\n",
|
|||
|
|
" plt.tight_layout()\n",
|
|||
|
|
" plt.show()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 307,
|
|||
|
|
"id": "ba4c495b-95d6-49d8-84a9-8d3fd674c975",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuEAAAKyCAYAAAB7WgDLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsvXm0bVdVJv6tZjdnn+b2793Xpn+PkJdACAhBBKRoCspSCKKioAMiIhhBEA2diELRCqVJaFJVgGJoLAXFUkQUm59oDNJGEAOkf+19tzv3dLtZze+PtfZc56aBBAJJdM8xMnLfOfvsvfZq5pprzm9+k1lrLRpppJFGGmmkkUYaaaSR75nwe7sBjTTSSCONNNJII4008p9NGiO8kUYaaaSRRhpppJFGvsfSGOGNNNJII4000kgjjTTyPZbGCG+kkUYaaaSRRhpppJHvsTRGeCONNNJII4000kgjjXyPpTHCG2mkkUYaaaSRRhpp5HssjRHeSCONNNJII4000kgj32NpjPBGGmmkkUYaaaSRRhr5HktjhDfSSCON/CeTf//3f8fll1+OY8eO3dtNaaSRRhr5TyuNEd5II/cTufzyy3Hw4MF7uxmNfBN5+ctfjsc97nH3ahu+1TwZDAa45JJL0O/3sWvXru9hyxpppJFGGpmWxghvpJF7QT760Y/i4MGD9N+5556LRz3qUbj44ovx/ve/H8Ph8B55zokTJ3D55Zfjq1/96j1yv/8sMplMcPnll+Oaa665V9vx7ne/G3/91399j97zFa94BR74wAfila985T163++WXHPNNTh48CA+8YlPbPu8LEs8//nPxwMe8AD80R/90Xf0jKuvvhqveMUr8KQnPQkPetCD8F/+y3/Bq171KqysrHxH922kkUYa+WbSGOGNNHIvyote9CK85S1vwWtf+1o8+9nPBgC84Q1vwA//8A/j3//937dd+4IXvADXXnvt3br/ysoKrrjiisYIv5symUxwxRVX4DOf+czd+t3rXve62xmL34lceeWVd9sI/2bz5PDhwzh06BDe+ta3gvP7r/qvqgovetGL8Pd///d43etehx/90R/9ju731re+FZ/5zGfw+Mc/Hq9+9avx3/7bf8Nf/MVf4GlPexpOnjx5D7W6kUYaaWS7yHu7AY008p9ZHv3oR+Pcc8+lfz//+c/H1VdfjZ//+Z/HC1/4Qnz84x9HmqYAACklpGyW7H1RxuMxsixDFEX3ehu+2TzZu3cvfv7nf/573LJ7Vqqqwi/90i/h7/7u7/Cbv/mbeMYznvEd3/MVr3gFLrjggm0Hkx/4gR/As571LFx11VV4yUte8h0/o5FGGmnktnL/dYU00sh/ULnwwgvxwhe+EEeOHMGf/umf0ud3hPX9x3/8Rzzzmc/EQx/6UJx//vl40pOehLe//e0AXBi/9hC+4hWvIOjLRz/6UQDAZz/7WbzoRS/CYx/7WBw6dAiPecxj8IY3vAF5nm97xstf/nKcf/75OHHiBF74whfi/PPPxyMe8Qi8+c1vhtZ627XGGPze7/0e/vt//+8499xz8YhHPAIXX3wx/vVf/3XbdR/72Mdw0UUX4bzzzsP3fd/34SUvecldShKs++DGG2/Ey172MlxwwQV4xCMegd/+7d+GtRbHjh3DC17wAjzkIQ/B93//9+O9733vtt+XZYnf+Z3fwUUXXYQLLrgAD37wg/GTP/mT+Od//me65vDhw7jwwgsBAFdccQX12+WXX76tP2655RY873nPw/nnn4+Xvexl9N00Jvyyyy7DAx7wAFx99dXb2vFrv/ZrOHTo0O2iHdNy8OBBjMdj/PEf/zG14eUvf/m2fvjGN76BX/7lX8bDHvYw/ORP/uS2724rd7XPv/SlL+Hiiy/GBRdcgAc96EF41rOehc997nN3PijfQ1FK4aUvfSk+9alP4bWvfS1+7Md+7B6578Me9rDbRQYe9rCHYXZ2FjfccMM98oxGGmmkkdtK41ZrpJH7oPzIj/wI3v72t+PTn/70nRoaX//61/H85z8fBw8exIte9CLEcYybb74Zn//85wEAZ5xxBl70ohfhsssuw4//+I/jggsuAAA85CEPAQB84hOfQJ7neOYzn4nZ2Vlce+21uOqqq3D8+HFcdtll256ltcbFF1+M8847D7/6q7+Kq6++Gu9973uxb98+Mv4A4FWvehU++tGP4tGPfjR+9Ed/FFprfPazn8WXvvQl8vi/613vwu/8zu/gyU9+Mn70R38U6+vruOqqq/BTP/VT+JM/+RP0er1v2T8veclLcMYZZ+CXf/mX8fd///d417vehdnZWXz4wx/GIx7xCLzsZS/D//t//w9vfvObce655+JhD3sYAGA4HOIP//AP8UM/9EN4xjOegdFohD/6oz/Cz/7sz+IP//APcfbZZ2N+fh6vfe1r8drXvhZPeMIT8IQnPAEAthm2SikyVC+99FKKVtxWXvCCF+Bv//Zv8apXvQp/+qd/ik6ng3/4h3/A//2//xcvfvGL8YAHPOBO3/Etb3kLXv3qV+O8886jObB///5t17z4xS/GKaecgpe85CWw1t7pve5qn1999dV43vOeh0OHDuGSSy4BYwwf/ehH8TM/8zP44Ac/iPPOO+9bjs13S7TWeOlLX4q/+qu/wmte8xr8xE/8xO2uqaoKg8HgLt1vdnb2m0JyRqMRRqMR5ubmvu02N9JII418U7GNNNLI91w+8pGP2AMHDthrr732Tq+54IIL7FOf+lT692WXXWYPHDhA/37f+95nDxw4YNfW1u70Htdee609cOCA/chHPnK77yaTye0+u/LKK+3BgwftkSNH6LNLL73UHjhwwF5xxRXbrn3qU59qn/a0p9G/r776anvgwAH7ute97nb3NcZYa609fPiwPfvss+273vWubd9fd9119oEPfODtPr+t1H3wa7/2a/SZUso++tGPtgcPHrRXXnklfd7v9+15551nL7300m3XFkWx7Z79ft8+8pGPtK94xSvos7W1NXvgwAF72WWX3a4NdX/81m/91h1+94M/+IO3e7dzzjnHvupVr7L9ft/+wA/8gL3oootsVVXf9F2ttfbBD37wtvbfth9e+tKX3ul3tdzVPjfG2Cc+8Yn2uc99Lo2XtW6ePO5xj7PPec5zvmV7vxvyz//8z/bAgQP2B3/wB+2BAwfsVVdd9S2vvSv/3Xrrrd/0ue94xzvsgQMH7D/90z/d06/USCONNGKttbbxhDfSyH1UsizDaDS60+9r7+WnPvUpPP3pT7/biXbT3tvxeIw8z3H++efDWot/+7d/w+7du7dd/8xnPnPbvy+44IJtcJlPfvKTYIzhkksuud2zGGMAgL/6q7+CMQZPfvKTsb6+Tt8vLi7ilFNOwTXXXHOXMMvTiXhCCBw6dAjHjx/f9nmv18Npp52GW2+9ddu1QggADjqztbUFYwwOHTqEf/u3f/uWz52W2/bHncmBAwfwohe9CG9729tw3XXXYWNjA+9973vvEXz/HXmDbyt3tc+/+tWv4qabbsILXvACbGxsbLvHhRdeiI997GMwxtxrCZ2rq6uQUmLv3r13es0DHvAAvO9977tL91taWrrT7/7lX/4F73jHO/DkJz+ZoEmNNNJII/e0NEZ4I43cR2U8HmNhYeFOv3/KU56CP/zDP8SrX/1qvO1tb8OFF16IJzzhCfiv//W/3iVD6ejRo7jsssvwN3/zN+j3+9u+uy1FYpIkmJ+f3/bZzMzMtt/dcsst2LFjB2ZnZ+/0mTfddBOstXjiE594h9/fVcP0tgeEbrd7h23sdrvY3Nzc9tkf//Ef473vfS9uvPFGVFVFn38z4+6O2rm8vHyXr7/44ovx53/+57j22mvx0pe+FGeeeeZd/u03k7vS5rva5zfddBMA4NJLL73Tew0GA8zMzNzhd98Ji8g3M4hr+ZVf+RX83u/9Hl784hfjPe95D8GrpmVmZgaPfOQjv+12AMD111+PSy65BGeddRZe//rXf0f3aqSRRhr5ZtIY4Y00ch+U48ePYzAY3A4DPC1pmuIDH/gArrnmGvzd3/0d/uEf/gEf//jH8Qd/8Ad
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 800x700 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuEAAAKyCAYAAAB7WgDLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs/XfYbVdVL45/Zlll97edmpOTfkIgCYaAEEBALuCFRwWjCCjoAxERiDSjCYiIwqUooCah5HcvYAnFqzSvcimicoVvDBcvnRha2klyynvetutaa5bfH3POMddOIQmknMgaz3Oes9+9V5l9jjnGZ3wGs9ZaNNJII4000kgjjTTSSCP3mvD7ugCNNNJII4000kgjjTTyoyaNEt5II4000kgjjTTSSCP3sjRKeCONNNJII4000kgjjdzL0ijhjTTSSCONNNJII400ci9Lo4Q30kgjjTTSSCONNNLIvSyNEt5II4000kgjjTTSSCP3sjRKeCONNNJII4000kgjjdzL0ijhjTTSSCONNNJII400ci9Lo4Q30kgjjfyIyX/8x3/gkksuwc0333xfF6WRRhpp5EdWGiW8kUbuJ3LJJZfg1FNPva+L0cj3kYsuugiPf/zj79My3NE4GQ6HOP/887G5uYldu3bdiyVrpJFGGmmkLo0S3kgj94F8+MMfxqmnnkr/zjjjDDz60Y/Geeedh7/8y7/EaDS6W95z8OBBXHLJJbjqqqvuluf9qMh0OsUll1yCK6+88j4tx7ve9S784z/+4936zFe+8pV44AMfiFe96lV363PvKbnyyitx6qmn4hOf+MTc92VZ4gUveAEe8IAH4G//9m9/qHfccj7W/x0+fPiHenYjjTTSyO2JvK8L0EgjP8rykpe8BHv27IFSCqurq/jCF76AN7zhDfjzP/9zvOMd78ADHvAAuvaFL3whfv3Xf/0uPf/QoUO49NJLccwxx+C00067u4v/n1am0ykuvfRSnH/++Xj4wx9+p+973eteB2vt3VaOyy67DD/1Uz+FJzzhCXf6nu83Tvbv34/TTz8dz33uc8H5/dcGU1UVXvKSl+Czn/0sXve61+EXfuEX7pbnhvlYl36/f7c8u5FGGmnkltIo4Y00ch/KYx7zGJxxxhn09wte8AJcccUV+I3f+A286EUvwsc//nHkeQ4AkFJCymbKHo0ymUzQbreRJMl9XobvN0727NmD3/iN37iXS3b3SlVVeNnLXoZ/+Zd/wR/+4R/i6U9/+t327FvOx0YaaaSRe1Luv6aQRhr5TyrnnHMOXvSiF+HGG2/E3/3d39H3t4X1/fznP49nPetZeOhDH4qzzjoLP/VTP4W3ve1tAJwbP1gIX/nKV5J7/cMf/jAA4Itf/CJe8pKX4HGPexxOP/10PPaxj8Ub3vAGzGazuXdcdNFFOOuss3Dw4EG86EUvwllnnYVHPOIRePOb3wyt9dy1xhj8xV/8BX7mZ34GZ5xxBh7xiEfgvPPOw9e+9rW56z72sY/h3HPPxZlnnokf//Efx8tf/vI7FSQY2uCaa67BBRdcgLPPPhuPeMQj8Kd/+qew1uLmm2/GC1/4QjzkIQ/Box71KLznPe+Zu78sS/zZn/0Zzj33XJx99tn4sR/7MfzSL/0S/u3f/o2u2b9/P8455xwAwKWXXkrtdskll8y1x/XXX4/nP//5OOuss3DBBRfQb3VM+MUXX4wHPOABuOKKK+bK8Xu/93s4/fTT8R//8R+3W9dTTz0Vk8kEH/nIR6gMF1100Vw7fOc738Fv/dZv4WEPexh+6Zd+ae63W8qdbfOvfOUrOO+883D22WfjwQ9+MJ797Gfj3//932+/U+5FUUrhFa94BT7zmc/gta99LX7xF3/xbn/HaDS61bhupJFGGrknpDGrNdLIUShPfepT8ba3vQ2f+9znblfR+Pa3v40XvOAFOPXUU/GSl7wEaZriuuuuw//7f/8PAHDSSSfhJS95CS6++GI84xnPwNlnnw0AeMhDHgIA+MQnPoHZbIZnPetZWFhYwFe/+lVcfvnlOHDgAC6++OK5d2mtcd555+HMM8/E7/zO7+CKK67Ae97zHhx77LGk/AHA7/7u7+LDH/4wHvOYx+AXfuEXoLXGF7/4RXzlK18hC+M73/lO/Nmf/Rme/OQn4xd+4RewtraGyy+/HL/8y7+Mj370o3fK/f/yl78cJ510En7rt34Ln/3sZ/HOd74TCwsL+OAHP4hHPOIRuOCCC/C//tf/wpvf/GacccYZeNjDHgbAKVh/8zd/g5/+6Z/G05/+dIzHY/zt3/4tfu3Xfg1/8zd/g9NOOw1LS0t47Wtfi9e+9rV44hOfiCc+8YkAMKfYKqVIUb3wwgvJW3FLeeELX4h//ud/xu/+7u/i7/7u79DtdvGv//qv+J//83/ipS996Rzc6JbyR3/0R3j1q1+NM888k8bA3r1756556UtfiuOOOw4vf/nLvy8M5s62+RVXXIHnP//5OP3003H++eeDMYYPf/jD+NVf/VW8//3vx5lnnnmHfXNPidYar3jFK/DpT38ar3nNa/DMZz7zVtdUVYXhcHinnrewsHArSM6v/MqvYDKZIEkSPPrRj8ZFF12E448//u4ofiONNNLIrcU20kgj97p86EMfsvv27bNf/epXb/eas88+2z7taU+jvy+++GK7b98++vu9732v3bdvnz1y5MjtPuOrX/2q3bdvn/3Qhz50q9+m0+mtvrvsssvsqaeeam+88Ub67sILL7T79u2zl1566dy1T3va0+zP/dzP0d9XXHGF3bdvn33d6153q+caY6y11u7fv9+edtpp9p3vfOfc71dffbV94AMfeKvvbymhDX7v936PvlNK2cc85jH21FNPtZdddhl9v7m5ac8880x74YUXzl1bFMXcMzc3N+0jH/lI+8pXvpK+O3LkiN23b5+9+OKLb1WG0B5vectbbvO3n/zJn7xV3R70oAfZ3/3d37Wbm5v2J37iJ+y5555rq6r6vnW11tof+7Efmyv/LdvhFa94xe3+FuTOtrkxxj7pSU+yz3ve86i/rHXj5PGPf7x97nOfe4flvSfk3/7t3+y+ffvsT/7kT9p9+/bZyy+//A6vvTP/brjhBrrvH/7hH+xFF11kP/KRj9hPf/rT9k/+5E/sgx/8YPvwhz/c3nTTTfdGNRtppJEfQWks4Y00cpRKu93GeDy+3d+D9fIzn/kMfv7nf/4uB9rVrbeTyQSz2QxnnXUWrLX45je/id27d89d/6xnPWvu77PPPnsOLvOpT30KjDGcf/75t3oXYwwA8OlPfxrGGDz5yU/G2toa/b6ysoLjjjsOV1555Z3CLNcD8YQQOP3003HgwIG57/v9Pk444QTccMMNc9cKIQA46MzW1haMMTj99NPxzW9+8w7fW5dbtsftyb59+/CSl7wEb33rW3H11VdjfX0d73nPe+4WfP9tWYNvKXe2za+66ipce+21eOELX4j19fW5Z5xzzjn42Mc+BmPMfRbQubq6CinlrQIn6/KABzwA733ve+/U87Zt20afn/KUp+ApT3kK/f2EJzwBj370o/HsZz8b73znO/GHf/iHP3jBG2mkkUZuRxolvJFGjlKZTCZYXl6+3d+f8pSn4G/+5m/w6le/Gm9961txzjnn4IlPfCL+63/9r3dKUbrppptw8cUX45/+6Z+wubk599stKRKzLMPS0tLcd4PBYO6+66+/Htu3b8fCwsLtvvPaa6+FtRZPetKTbvP3O6uY3vKA0Ov1brOMvV4PGxsbc9995CMfwXve8x5cc801qKqKvv9+yt1tlXPnzp13+vrzzjsP//AP/4CvfvWreMUrXoGTTz75Tt/7/eTOlPnOtvm1114LALjwwgtv91nD4RCDweA2f/thqPzqCvHtyW//9m/jL/7iL/DSl74U7373uwleVZfBYIBHPvKRP3A56vLQhz4UD37wg2+F52+kkUYaubukUcIbaeQolAMHDmA4HN4KA1yXPM/xvve9D1deeSX+5V/+Bf/6r/+Kj3/84/jrv/5rvOc97yGL722J1hrPfe5zsbm5iV/7tV/DiSeeiHa7jYMHD+K
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 800x700 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuEAAAKyCAYAAAB7WgDLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs/XfYpldVL45/drnL0942JTPJpJFkhpBJIEQkAQTkCyj8EDCCgIIKERGIoWoCIqJwCCBwNAkllweiENpRmkeRIpYjGqIivQQQSJlMf+ctT7vL3vv3x957rfuZmZAEUo/3uq655nmf5y6777XX+qzPEs45h1ZaaaWVVlpppZVWWmnlLhN5dxeglVZaaaWVVlpppZVW/rtJq4S30korrbTSSiuttNLKXSytEt5KK6200korrbTSSit3sbRKeCuttNJKK6200korrdzF0irhrbTSSiuttNJKK620chdLq4S30korrbTSSiuttNLKXSytEt5KK6200korrbTSSit3sbRKeCuttNJKK6200korrdzF0irhrbTSSiv/j8q3vvUtXH755di9e/fdXZRWWmmllVYOk1YJb6WVe5hcfvnl2LFjx91djFZ+iFxyySV41KMedbeW4dbGyfr6Oi688EKsrq5i69atd2HJWmmllVZauS3SKuGttHInykc+8hHs2LGD/p155pl42MMehgsuuADvec97MBwO75D37N27F5dffjm++c1v3iHP++8ik8kEl19+Oa699tq7tRzvfOc78Xd/93d36DNf8YpX4H73ux9e+cpX3qHPvbPk2muvxY4dO/DJT35y5vuyLPG85z0P973vffGXf/mXP9Y79u3bhze/+c141rOehbPPPhs7duz4oX3/n//5n3jGM56B+9///njoQx+K173udRiNRj9WGVpppZVWorRKeCut3AVy0UUX4U1vehNe85rX4FnPehYA4PWvfz2e+MQn4lvf+tbMtc9//vPxla985XY9f9++fbjiiitaJfx2ymQywRVXXIF/+7d/u133vfa1rz1CWfxx5Morr7zdSvgPGyc33XQTdu7ciT/6oz+ClPfeZb6qKlx00UX4p3/6J7z2ta/FU57ylB/red///vfxp3/6p9i3b9+tepu++c1v4td+7dcwnU5xySWX4ClPeQo+9KEP4UUvetGPVYZWWmmllSj67i5AK638d5CHP/zhOPPMM+nv5z3vebjmmmvwm7/5m3jBC16AT3ziE8jzHACgtYbW7dS8J8p4PEa320WSJHd7GX7YONm2bRt+8zd/8y4u2R0rVVXhxS9+Mf7xH/8Rf/iHf4inPvWpP/YzzzjjDFx77bVYWFjAJz/5SXzxi1+8xWvf+ta3Ym5uDu9973vR7/cB+HZ91atehc997nN42MMe9mOXp5VWWvnvLfdeE0krrdzL5bzzzsMLXvAC7Nq1C3/1V39F3x8N6/sv//IveMYznoGf+ImfwNlnn42f+ZmfwVvf+lYA3o0fLYSveMUrCPrykY98BADwH//xH7jooovwyEc+Ejt37sQjHvEIvP71r8d0Op15xyWXXIKzzz4be/fuxQte8AKcffbZOPfcc/HGN74RxpiZa621+PM//3P83M/9HM4880yce+65uOCCC/DVr3515rqPf/zjOP/883HWWWfhJ3/yJ/GSl7zkNgUJxjb4/ve/j5e//OU455xzcO655+KP//iP4ZzD7t278fznPx8PfOAD8dCHPhTvfve7Z+4vyxJ/8id/gvPPPx/nnHMOHvCAB+CXfumX8PnPf56uuemmm3DeeecBAK644gpqt8svv3ymPW644QY897nPxdlnn42Xv/zl9FsTE37ZZZfhvve9L6655pqZcvze7/0edu7ceYS3oyk7duzAeDzGRz/6USrDJZdcMtMO3/3ud/Gyl70MD3rQg/BLv/RLM78dLre1zb/85S/jggsuwDnnnIP73//+eOYzn4kvfOELt9wpd6HUdY2XvvSl+OxnP4vXvOY1+MVf/MU75Ln9fh8LCwu3et1wOMS//uu/4olPfCIp4ADwpCc9Cd1uF3/7t397h5SnlVZa+e8trbmtlVbuRnnSk56Et771rfjc5z53i4rGd77zHTzvec/Djh07cNFFFyFNU1x//fX4z//8TwDAKaecgosuugiXXXYZnva0p+Gcc84BADzwgQ8EAHzyk5/EdDrFM57xDCwsLOArX/kKrr76auzZsweXXXbZzLuMMbjgggtw1lln4Xd+53dwzTXX4N3vfjeOP/54Uv4A4Hd/93fxkY98BA9/+MPxlKc8BcYY/Md//Ae+/OUvk8X/He94B/7kT/4Ej3vc4/CUpzwFy8vLuPrqq/HLv/zL+NjHPoa5ublbbZ+XvOQlOOWUU/Cyl70M//RP/4R3vOMdWFhYwAc/+EGce+65ePnLX47/83/+D974xjfizDPPxIMe9CAAXon6i7/4CzzhCU/AU5/6VIxGI/zlX/4lfv3Xfx1/8Rd/gdNPPx1LS0t4zWteg9e85jV4zGMeg8c85jEAMKPY1nVNiurFF19M3orD5fnPfz7+4R/+Ab/7u7+Lv/qrv0K/38c///M/43//7/+NF73oRbjvfe97i3V805vehFe96lU466yzaAyccMIJM9e86EUvwoknnoiXvOQlcM7d4rNua5tfc801eO5zn4udO3fiwgsvhBACH/nIR/Crv/qreP/734+zzjrrVvvmzhJjDF760pfiM5/5DF796lfj6U9/+hHXVFWF9fX12/S8hYWF2w3Jue6661DXNXbu3DnzfZqmOP3001vYVyuttHLHiGullVbuNPnwhz/stm/f7r7yla/c4jXnnHOOe/KTn0x/X3bZZW779u3091VXXeW2b9/uDh48eIvP+MpXvuK2b9/uPvzhDx/x22QyOeK7K6+80u3YscPt2rWLvrv44ovd9u3b3RVXXDFz7ZOf/GT38z//8/T3Nddc47Zv3+5e+9rXHvFca61zzrmbbrrJnX766e4d73jHzO/XXXedu9/97nfE94dLbIPf+73fo+/qunYPf/jD3Y4dO9yVV15J36+urrqzzjrLXXzxxTPXFkUx88zV1VX3kIc8xL3iFa+g7w4ePOi2b9/uLrvssiPKENvjzW9+81F/++mf/ukj6nbGGWe43/3d33Wrq6vup37qp9z555/vqqr6oXV1zrkHPOABM+U/vB1e+tKX3uJvUW5rm1tr3WMf+1j3nOc8h/rLOT9OHvWoR7lnP/vZt1reO0M+//nPu+3bt7uf/umfdtu3b3dXX331rV57W/7deOONR33G3/7t37rt27e7z3/+87f427//+78f8dtFF13kHvrQh/7oFW2llVZaCdJawltp5W6Wbrf7QxkXovXys5/9LH7hF37hdlv1mtbb8XiM6XSKs88+G845fOMb38Cxxx47c/0znvGMmb/POeecGbjMpz/9aQghcOGFFx7xLiEEAOAzn/kMrLV43OMeh+XlZfp948aNOPHEE3HttdfeJsxyMxBPKYWdO3diz549M9/Pzc3h5JNPxo033jhzrVIKgIfOrK2twVqLnTt34hvf+Matvrcph7fHLcn27dtx0UUX4S1veQuuu+46HDp0CO9+97vvEHz/0azBh8ttbfNvfvOb+MEPfoDnP//5OHTo0MwzzjvvPHz84x+HtfZuC+g8cOAAtNbYtm3bLV5z3/veF1ddddVtet6mTZtudxkiVCtN0yN+y7LsCChXK6200sqPIq0S3kord7OMx2Ns2LDhFn9//OMfj7/4i7/Aq171KrzlLW/Beeedh8c85jH42Z/92dukKN1888247LLL8Pd///dYXV2d+e1wisQsy7C0tDTz3fz8/Mx9N9xwAzZv3vxDsbU/+MEP4JzDYx/72KP+flsV08MPCIPB4KhlHAwGWFlZmfnuox/9KN797nfj+9//Pqqqou9/mHJ3tHJu2bLlNl9/wQUX4G/+5m/wla98BS996Utx6qmn3uZ7f5jcljLf1jb/wQ9+AAC4+OKLb/FZ6+vrmJ+fP+pv+/fvv9Wy3JLcFoX4t3/7t/Hnf/7neNGLXoR3vetdBK9qyvz8PB7ykIf
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 800x700 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"import matplotlib.pyplot as plt\n",
|
|||
|
|
"import seaborn as sns\n",
|
|||
|
|
"from sklearn.metrics import pairwise_distances\n",
|
|||
|
|
"\n",
|
|||
|
|
"def plot_distance_matrix_sorted(X_scaled, labels, max_points=400, title=\"Distance matrix\"):\n",
|
|||
|
|
" \"\"\"\n",
|
|||
|
|
" Trace la matrice de distance triée par cluster avec des lignes séparatrices.\n",
|
|||
|
|
" \n",
|
|||
|
|
" Parameters\n",
|
|||
|
|
" ----------\n",
|
|||
|
|
" X_scaled : np.array ou pd.DataFrame\n",
|
|||
|
|
" Les données numériques standardisées (n_samples x n_features)\n",
|
|||
|
|
" labels : array-like\n",
|
|||
|
|
" Les labels de cluster pour chaque point\n",
|
|||
|
|
" max_points : int, optional\n",
|
|||
|
|
" Nombre maximum de points à afficher pour éviter des matrices trop grandes\n",
|
|||
|
|
" title : str, optional\n",
|
|||
|
|
" Titre de la figure\n",
|
|||
|
|
" \"\"\"\n",
|
|||
|
|
" n = X_scaled.shape[0]\n",
|
|||
|
|
" idx = np.arange(n)\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Sous-échantillonnage si nécessaire\n",
|
|||
|
|
" if n > max_points:\n",
|
|||
|
|
" rng = np.random.default_rng(42)\n",
|
|||
|
|
" idx = rng.choice(idx, size=max_points, replace=False)\n",
|
|||
|
|
"\n",
|
|||
|
|
" X_sub = X_scaled[idx]\n",
|
|||
|
|
" labels_sub = np.asarray(labels)[idx]\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Tri par cluster\n",
|
|||
|
|
" order = np.lexsort((np.arange(len(labels_sub)), labels_sub))\n",
|
|||
|
|
" X_sub = X_sub[order]\n",
|
|||
|
|
" labels_sub = labels_sub[order]\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Matrice de distances\n",
|
|||
|
|
" D = pairwise_distances(X_sub)\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Figure\n",
|
|||
|
|
" plt.figure(figsize=(8, 7))\n",
|
|||
|
|
" sns.heatmap(D, cmap=\"viridis\")\n",
|
|||
|
|
" \n",
|
|||
|
|
" # Lignes séparatrices entre clusters\n",
|
|||
|
|
" unique_labels, counts = np.unique(labels_sub, return_counts=True)\n",
|
|||
|
|
" boundaries = np.cumsum(counts)\n",
|
|||
|
|
" for b in boundaries[:-1]: # on ignore la dernière limite\n",
|
|||
|
|
" plt.axhline(b, color='red', linewidth=2)\n",
|
|||
|
|
" plt.axvline(b, color='red', linewidth=2)\n",
|
|||
|
|
"\n",
|
|||
|
|
" plt.title(title)\n",
|
|||
|
|
" plt.tight_layout()\n",
|
|||
|
|
" plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in [2, 5, 10]:\n",
|
|||
|
|
" plot_distance_matrix_sorted(\n",
|
|||
|
|
" X_scaled,\n",
|
|||
|
|
" dfc[f\"cluster_k{k}\"].values,\n",
|
|||
|
|
" title=f\"Distance matrix triée — K={k}\"\n",
|
|||
|
|
" )\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 2 premiers clusters assez distants"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 309,
|
|||
|
|
"id": "3ceef678-6be4-40df-bbd5-c8b0f8b2d075",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"churn_hard 0.033333\n",
|
|||
|
|
"churn_soft 0.204762\n",
|
|||
|
|
"churn_warning 0.252381\n",
|
|||
|
|
"dtype: float64\n",
|
|||
|
|
"\n",
|
|||
|
|
"===== CHURN PAR CLUSTER K=2 =====\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>churn_hard_rate</th>\n",
|
|||
|
|
" <th>churn_soft_rate</th>\n",
|
|||
|
|
" <th>churn_warning_rate</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_k2</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>302</td>\n",
|
|||
|
|
" <td>0.026490</td>\n",
|
|||
|
|
" <td>0.182119</td>\n",
|
|||
|
|
" <td>0.208609</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>118</td>\n",
|
|||
|
|
" <td>0.050847</td>\n",
|
|||
|
|
" <td>0.262712</td>\n",
|
|||
|
|
" <td>0.364407</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
|
|||
|
|
"cluster_k2 \n",
|
|||
|
|
"0 302 0.026490 0.182119 0.208609\n",
|
|||
|
|
"1 118 0.050847 0.262712 0.364407"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"\n",
|
|||
|
|
"===== CHURN PAR CLUSTER K=5 =====\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>churn_hard_rate</th>\n",
|
|||
|
|
" <th>churn_soft_rate</th>\n",
|
|||
|
|
" <th>churn_warning_rate</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_k5</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>252</td>\n",
|
|||
|
|
" <td>0.031746</td>\n",
|
|||
|
|
" <td>0.238095</td>\n",
|
|||
|
|
" <td>0.289683</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>84</td>\n",
|
|||
|
|
" <td>0.011905</td>\n",
|
|||
|
|
" <td>0.083333</td>\n",
|
|||
|
|
" <td>0.083333</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>73</td>\n",
|
|||
|
|
" <td>0.068493</td>\n",
|
|||
|
|
" <td>0.232877</td>\n",
|
|||
|
|
" <td>0.301370</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.200000</td>\n",
|
|||
|
|
" <td>0.400000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
|
|||
|
|
"cluster_k5 \n",
|
|||
|
|
"2 252 0.031746 0.238095 0.289683\n",
|
|||
|
|
"0 84 0.011905 0.083333 0.083333\n",
|
|||
|
|
"1 73 0.068493 0.232877 0.301370\n",
|
|||
|
|
"4 10 0.000000 0.200000 0.400000\n",
|
|||
|
|
"3 1 0.000000 0.000000 0.000000"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Analyse churn\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"churn_hard\"] = (dfc[\"aum_final_to_peak\"] < 0.10).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"churn_soft\"] = (\n",
|
|||
|
|
" (dfc[\"aum_final_to_peak\"] < 0.40) &\n",
|
|||
|
|
" (dfc[\"aum_drawdown_last\"] > 0.40)\n",
|
|||
|
|
").astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"churn_warning\"] = (\n",
|
|||
|
|
" (dfc[\"flow_direction_balance\"] < 0) &\n",
|
|||
|
|
" (dfc[\"aum_drawdown_last\"] > 0.20)\n",
|
|||
|
|
").astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(dfc[[\"churn_hard\", \"churn_soft\", \"churn_warning\"]].mean())\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in [2, 5]:\n",
|
|||
|
|
" out = (\n",
|
|||
|
|
" dfc.groupby(f\"cluster_k{k}\")\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_clients=(ID_COL, \"count\"),\n",
|
|||
|
|
" churn_hard_rate=(\"churn_hard\", \"mean\"),\n",
|
|||
|
|
" churn_soft_rate=(\"churn_soft\", \"mean\"),\n",
|
|||
|
|
" churn_warning_rate=(\"churn_warning\", \"mean\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values(\"n_clients\", ascending=False)\n",
|
|||
|
|
" )\n",
|
|||
|
|
" print(f\"\\n===== CHURN PAR CLUSTER K={k} =====\")\n",
|
|||
|
|
" display(out)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 310,
|
|||
|
|
"id": "d41ca763-0c29-4dad-9749-44a3773ad75a",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAS49JREFUeJzt3XlYFeX///EXHEFwRxYzFzRNQMF9JUlzKcPMxDRLKhW11EzNEkxzqQxzyYLKVHAjKzGV6pNZaVlWqH1Nk/y4paaYGgiaCyh44PeHP86nE6DAAEfh+bgursszc8897znNNZ3Xmfs+Y5ednZ0tAAAAADDA3tYFAAAAALj1ESwAAAAAGEawAAAAAGAYwQIAAACAYQQLAAAAAIYRLAAAAAAYRrAAAAAAYBjBAgAAAIBhBAsAAAAAhhEsAKAIvLy89PLLL9u6jDIlLCxM3bp1s3UZAIAiIlgAwD8cP35c06ZNU/fu3eXn56fWrVtr0KBBWrFihS5fvmzr8mDAqlWrtG7dOluXUSzyC7bvvfeevLy8NHnyZGVlZRW5/8OHD2vOnDnq27evWrVqpc6dO2vkyJFKSEgwUjaAMq6CrQsAgJvFli1bNG7cODk6Oqpv375q0qSJMjMztXPnTs2dO1e///67XnnlFVuXiSL68MMP5eLioqCgIFuXUiIWL16sBQsWqF+/fpo1a5bs7Yv+3eHHH3+sjz/+WPfee68ee+wxXbhwQatXr9YjjzyiqKgo+fv7F2PlAMoKggUASEpMTNSECRN0++23a8WKFfLw8LCsGzx4sI4dO6YtW7aUak1ZWVnKzMxUxYoVS3W/RXEr1Vqcrl69qqysLDk6Otq0jqioKM2fP18PPfSQXnvtNUOhQpJ69+6tZ555RpUrV7Ys69+/vwIDAxUZGUmwAJAnhkIBgK59MEtLS9OsWbOsQkUOT09PPfnkk7mWb9q0SQ888IB8fX3Vu3dvff/991br85s3EBkZKS8vL6tlOcNbPv30U/Xu3Vt+fn7aunWr1q1bJy8vL+3cuVPh4eHq2LGjWrZsqTFjxig1NfWGxxYWFqZWrVopMTFRISEhatmypTp37qy3335b2dnZVm2jo6M1aNAgdejQQc2bN1dQUJA2btyYq8/8ar2e7777TsHBwWrVqpVat26t/v3767PPPsu3/fbt2+Xl5aXt27dbLT9x4oS8vLyshjUlJydr8uTJuvvuu+Xr66vOnTtr1KhROnHihCSpW7duOnTokHbs2CEvLy95eXnp8ccft2x//vx5zZo1S126dJGvr6969uypxYsXWw0nytlvdHS0li9frh49esjPz0+HDx++7nGXtGXLlmnu3Ll68MEHFR4ebjhUSJKvr69VqJAkFxcXtW3bVkeOHDHcP4CyiTsWACDp22+/Vb169dS6desCb7Nz50599dVXeuyxx1S5cmXFxMTo2Wef1bfffisXF5ci1bFt2zZ98cUXGjx4sFxcXFSnTh2dP39ekvTqq6+qWrVqeuaZZ/Tnn39qxYoVevnll/Xmm2/esF+z2azhw4erRYsWeuGFF7R161ZFRkbKbDZr3LhxlnYrV65Ut27d1KdPH2VmZurzzz/XuHHjtGjRInXt2vWGteZn3bp1evHFF3XnnXfqqaeeUtWqVbVv3z5t3bpVffr0KdJ79U9jx47V77//ruDgYNWpU0epqan68ccfderUKdWtW1cvvviiXnnlFVWqVElPP/20JMnNzU2SlJ6eruDgYP31118aNGiQateurV27dumNN95QcnKypkyZkutYrly5ooEDB8rR0VHVq1c3XH9RrVixQrNnz9YDDzyg2bNn5xkqChI+JalKlSo3vPOSnJysGjVqFKVUAOUAwQJAuXfx4kX99ddf6t69e6G2O3z4sDZs2KD69etLkjp06KC+ffvq888/V3BwcJFqOXr0qD777DM1btzYsmzfvn2SpBo1amjp0qWys7OTdG34UUxMjC5cuKCqVatet98rV64oICBAU6dOlSQ99thjevrpp7VkyRI9/vjjqlmzpiTpyy+/lJOTk2W7wYMHKygoSMuWLcsVLPKqNS8XLlzQq6++qubNmysmJsZquNS/75gUxfnz57Vr1y5NmjRJISEhluVPPfWU5d89evTQm2++KRcXF/Xt29dq+2XLlikxMVHr169XgwYNJEmDBg2Sh4eHoqOjNWzYMNWuXdvS/vTp0/r6668t75mtbNmyRX/++aceeOABzZkzRyaTKc92nTp1KlB/4eHh151/8n//93/avXu3Ro0aVaR6AZR9BAsA5d7FixclKdfQjxvx9/e3hApJ8vb2VpUqVZSYmFjkWtq1a5fvB/WBAwdaQoUktW3bVsuXL9eff/4pb2/vG/Y9ePBgy7/t7Ow0ePBgbdmyRfHx8erdu7ckWYWKv//+W2azWW3atNHnn39eqFr/6ccff9SlS5c0cuTIXHMw/nk8ReXk5CQHBwft2LFDDz/8cKHvIGzcuFFt2rRRtWrVrL7d9/f31+LFi/Xzzz/rwQcftCy/9957bR4qJOnMmTOSpLp16+YbKqRrwakgrvffMiUlRRMnTlTdunU1fPjwwhUKoNwgWAAo96pUqSJJunTpUqG2++e32DmqV69uGbpUFHXr1s133e233271ulq1apJUoP3Z29urXr16VssaNmwoSfrzzz8ty7799lstXLhQ+/btU0ZGhmV5XgHgerX+0/HjxyVJd955Z4HaF5ajo6Oef/55vf7667rrrrvUokULde3aVQ899JDc3d1vuP2xY8d04MCBfL/Z//dQooIe97lz55SZmVmgtv9WvXr1Gw5Leuihh5SUlKT33ntPLi4uGjJkSJ7tjE60TktL01NPPaVLly7pgw8+KHQAB1B+ECwAlHtVqlSRh4eHDh06VKjt8vuW+J/De/L7Rt5sNue5/J93DP4tv0m5xTGcSLo21GXUqFFq166dpk+fLnd3dzk4OGjt2rX6z3/+U6hai0N+711ez2cYMmSIunXrpk2bNumHH37QW2+9pcWLF2vFihVq2rTpdfeTlZWlu+66K99v4nOGR+Uo6HGPHTtWO3bsKFDbf1u5cqU6dOhw3TYVKlTQW2+9peHDh2v27NmqWrWq+vfvn6tdcnJygfZZtWrVXMeWkZGhsWPH6sCBA4qOjlaTJk0KfhAAyh2CBQBIuueee7R69Wrt2rVLrVq1KrZ+q1WrlucdhZMnTxbbPgoiKytLiYmJlrsU0rU5EpIsk66//PJLVaxYUdHR0Vbflq9du9bQvnOGix06dEienp4F3i7njsyFCxeslv/zDsu/9zNs2DANGzZMf/zxhx566CEtXbpU8+bNk5R/UKlfv77S0tKK/SdUQ0NDi3z3qiBD2ySpYsWKWrhwoZ544gm99NJLqlatmnr27GnVpnPnzgXq699zLLKyshQaGqr4+Hi9+eabat++fcEPAEC5RLAAAEnDhw/XZ599pqlTp2rFihWWXwzKcfz4cX377bd5/uTs9dSvX18XLlzQ/v37LR8Wk5KS9PXXXxdb7QW1atUqy+Tt7OxsrVq1Sg4ODpYhQCaTSXZ2dlZ3U06cOKHNmzcb2m/nzp1VuXJlLVq0SAEBAbkmb+f3gb9OnToymUz6+eef1aNHD8vyDz/80Kpdenq67O3trfqtX7++KleubDWcy9nZOc8P+vfff78iIyO1detWBQQEWK07f/68KlWqpAoVCv+/S19f30JvUxRVqlRRVFSUHnvsMT333HNavHix1bCuos6xeOWVV7Rhwwa9/PLLuvfee4u1ZgBlE8ECAHTtg+i8efM0YcIEBQYGWp68nZGRoV27dmnjxo1FemJzYGCg5s2bp2eeeUaPP/64Ll++rA8//FANGzbU3r17S+BI8laxYkVt3bpVoaGhat68ubZu3aotW7bo6aeftkxE7tKli5YtW6bhw4frgQceUEpKij744APVr19fBw4cKPK+q1SposmTJ2vq1Kl6+OGH9cADD6hatWrav3+/Ll++rNdffz3P7apWrapevXrp/fffl52dnerVq6ctW7YoJSXFqt0ff/yhIUOGqFevXmrcuLFMJpM2bdqkM2f
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 800x400 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAU8xJREFUeJzt3XlclOX+//H3zAiCO7KYueCWAyruK2l1DMvQSjE9mlQqmqmZmeXS4lJ5cC+xk6YoCnlKPS6njmZlq51Q+xomeVxKTTE1EDQXUHDg94c/5jQBCjMDw/J6Ph48dO77uq/7c99eDvOeezPk5OTkCAAAAAAcYHR1AQAAAADKPoIFAAAAAIcRLAAAAAA4jGABAAAAwGEECwAAAAAOI1gAAAAAcBjBAgAAAIDDCBYAAAAAHEawAAAAAOAwggUA2MFsNuvVV191dRnlytSpU9WzZ09XlwEAsBPBAgD+4OTJk5o+fbruvfdeBQUFqX379ho8eLDWrFmjq1evuro8OGDt2rXatGmTq8twioKC7bJly2Q2mzVt2jRlZ2fb3f+pU6dkNpvz/dm6dasjpQMoxyq5ugAAKC2+/PJLTZgwQe7u7nr44YfVvHlzZWVlae/evZo/f75+/vlnvfbaa64uE3Z677335OXlpbCwMFeXUiyWL1+uN954Q/3799fs2bNlNDr+3WHfvn1111132Uxr27atw/0CKJ8IFgAgKSkpSRMnTtTtt9+uNWvWyM/Pzzpv6NChOnHihL788ssSrSk7O1tZWVmqXLlyia7XHmWpVme6fv26srOz5e7u7tI6oqOjtXDhQvXr109/+9vfnBIqJKlFixZ6+OGHndIXgPKPU6EAQDc+mKWnp2v27Nk2oSKXv7+/nnjiiTzTd+zYob59+6pVq1bq06ePvv76a5v5BV03sGTJEpnNZptpuae3fPDBB+rTp4+CgoK0c+dObdq0SWazWXv37lVkZKS6du2qtm3baty4cUpLS7vltk2dOlXt2rVTUlKSIiIi1LZtW3Xv3l1vvfWWcnJybNquXLlSgwcPVpcuXdS6dWuFhYVp+/btefosqNab+eqrrxQeHq527dqpffv2GjBggD788MMC2+/evVtms1m7d++2mZ57ms4fT2tKSUnRtGnTdNddd6lVq1bq3r27xowZo1OnTkmSevbsqZ9++kl79uyxntLz2GOPWZe/ePGiZs+erbvvvlutWrVSr169tHz5cpvTiXLXu3LlSq1evVohISEKCgrS0aNHb7rdxS0mJkbz58/XQw89pMjISKeFilzp6enKzMx0ap8AyieOWACApC+++EINGjRQ+/btC73M3r179cknn+jRRx9V1apVFRcXp2eeeUZffPGFvLy87Kpj165d+uijjzR06FB5eXmpXr16unjxoiTp9ddfV40aNfT000/r119/1Zo1a/Tqq6/qzTffvGW/FotFI0eOVJs2bfTCCy9o586dWrJkiSwWiyZMmGBtFxsbq549e+rBBx9UVlaWtm7dqgkTJuidd97RPffcc8taC7Jp0ya9+OKLuuOOOzR69GhVr15dBw8e1M6dO/Xggw/ata/+aPz48fr5558VHh6uevXqKS0tTf/5z3905swZ1a9fXy+++KJee+01ValSRU899ZQkycfHR5KUkZGh8PBw/fbbbxo8eLDq1q2rhIQELVq0SCkpKXrppZfybMu1a9c0aNAgubu7q2bNmg7Xb681a9Zozpw56tu3r+bMmZNvqChM+JSkatWq5Tny8tZbb2nevHkyGAxq2bKlJk6cqO7duzuldgDlD8ECQIV3+fJl/fbbb7r33nuLtNzRo0e1bds2NWzYUJLUpUsXPfzww9q6davCw8PtquX48eP68MMP1axZM+u0gwcPSpJq1aqlVatWyWAwSLpx+lFcXJwuXbqk6tWr37Tfa9euqUePHnr55ZclSY8++qieeuoprVixQo899phq164tSfr444/l4eFhXW7o0KEKCwtTTExMnmCRX635uXTpkl5//XW1bt1acXFxNqdL/fmIiT0uXryohIQETZ48WREREdbpo0ePtv49JCREb775pry8vPKc2hMTE6OkpCRt3rxZjRo1kiQNHjxYfn5+WrlypUaMGKG6deta2589e1affvqpdZ+5ypdffqlff/1Vffv21bx582QymfJt161bt0L1FxkZab3+xGg0qnv37goJCVGdOnWUlJSk1atXa9SoUVq6dGmesQAAEsECAHT58mVJUtWqVYu0XHBwsDVUSFJAQICqVaumpKQku2vp1KlTgR/UBw0aZA0VktSxY0etXr1av/76qwICAm7Z99ChQ61/NxgMGjp0qL788kvFx8erT58+kmQTKn7//XdZLBZ16NAh3zsB3azWP/rPf/6jK1eu6Mknn8xzDcYft8deHh4ecnNz0549e/TII48U+QjC9u3b1aFDB9WoUcPm2/3g4GAtX75c3333nR566CHr9Pvuu8/loUKSzp07J0mqX79+gaFCuhGcCuOP/5a33367Vq5caTP/4YcfVp8+fTRnzhyCBYB8ESwAVHjVqlWTJF25cqVIy/3xW+xcNWvWtJ66ZI/69esXOO/222+3eV2jRg1JKtT6jEajGjRoYDOtcePGkqRff/3VOu2LL77Q0qVLdfDgQZvz6vMLADer9Y9OnjwpSbrjjjsK1b6o3N3d9fzzz2vu3Lm688471aZNG91zzz3q16+ffH19b7n8iRMndPjw4QK/2f/zqUSF3e4LFy4oKyurUG3/rGbNmre8ILxfv35KTk7WsmXL5OXlpWHDhuXbLjg42K4a/qxWrVoKCwvT8uXLdfbsWd12221O6RdA+UGwAFDhVatWTX5+fvrpp5+KtFxB3xL/8fSegr6Rt1gs+U7/4xGDPyvoolxnnE4kSf/3f/+nMWPGqFOnTpoxY4Z8fX3l5uamjRs36t///neRanWGgvZdfs9nGDZsmHr27KkdO3bom2++0eLFi7V8+XKtWbNGLVq0uOl6srOzdeedd2rkyJH5zs89PSpXYbd7/Pjx2rNnT6Ha/llsbKy6dOly0zaVKlXS4sWLNXLkSM2ZM0fVq1fXgAED8rRLSUkp1DqrV69+y23LDRMXLlwgWADIg2ABAJL+8pe/aN26dUpISFC7du2c1m+NGjXyPaJw+vRpp62jMLKzs5WUlGQ9SiHduEZCkvWi648//liVK1fWypUrbb4t37hxo0Przj1d7KeffpK/v3+hl8s9InPp0iWb6X88wvLn9YwYMUIjRozQL7/8on79+mnVqlVasGCBpIKDSsOGDZWenu60b/ZzTZkyxe6jV4U5tU2SKleurKVLl+rxxx/XK6+8oho1aqhXr142bQp7sfUfr7EoSO5dtkrDqWAASh+CBQBIGjlypD788EO9/PLLWrNmjfWOQblOnjypL774It9bzt5Mw4YNdenSJR06dMj6YTE5OVmffvqp02ovrLVr11ov3s7JydHatWvl5uZmPQXIZDLJYDDYHE05deqUPvvsM4fW2717d1WtWlXvvPOOevTokefi7YI+8NerV08mk0nfffedQkJCrNPfe+89m3YZGRkyGo02/TZs2FBVq1a1OZ3L09Mz3w/6DzzwgJYsWaKdO3eqR48eNvMuXryoKlWqqFKlov+6bNWqVZGXsUe1atUUHR2tRx99VM8995yWL19uc1qXPddYpKWl5QkPv/32mzZu3Ciz2ZzvLZkBgGABALrxQXTBggWaOHGiQkNDrU/ezszMVEJCgrZv327XE5tDQ0O1YMECPf3003rsscd09epVvffee2rcuLEOHDhQDFuSv8qVK2vnzp2aMmWKWrdurZ07d+rLL7/UU089Zf0AeffddysmJkYjR45U3759lZqaqn/84x9q2LChDh8+bPe6q1WrpmnTpunll1/WI488or59+6pGjRo6dOiQrl69qrlz5+a7XPXq1dW7d2+9++67MhgMatCggb788kulpqbatPvll180bNgw9e7dW82aNZPJZNKOHTt07tw560XpktSyZUu99957evvtt+Xv76/atWurW7duioiI0Oeff66nnnpK/fv3V8uWLZW
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 800x400 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"for k in [2, 5]:\n",
|
|||
|
|
" tmp = (\n",
|
|||
|
|
" dfc.groupby(f\"cluster_k{k}\")\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" churn_hard=(\"churn_hard\", \"mean\"),\n",
|
|||
|
|
" churn_soft=(\"churn_soft\", \"mean\"),\n",
|
|||
|
|
" churn_warning=(\"churn_warning\", \"mean\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
" )\n",
|
|||
|
|
"\n",
|
|||
|
|
" tmp.plot(kind=\"bar\", figsize=(8, 4))\n",
|
|||
|
|
" plt.title(f\"Churn par cluster — K={k}\")\n",
|
|||
|
|
" plt.ylabel(\"Rate\")\n",
|
|||
|
|
" plt.tight_layout()\n",
|
|||
|
|
" plt.show()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"id": "1a302b4c-b358-4fba-a53a-3933d8799e86",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": []
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.13.12"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 5
|
|||
|
|
}
|