3783 lines
497 KiB
Plaintext
3783 lines
497 KiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "ee3b8d29-20bd-4c03-b366-0d16c21054c8",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"# Clustering"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 1,
|
|||
|
|
"id": "1161613a-7665-407a-9d2e-956947114767",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"import matplotlib.pyplot as plt"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"id": "ae8183d7-1ba0-443c-a5cc-14f43df82b42",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"/tmp/ipykernel_9087/3355046467.py:2: DtypeWarning: Columns (1,2,3,4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
|
" df_flows = pd.read_csv(\"flows.csv\")\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Unnamed: 0</th>\n",
|
|||
|
|
" <th>Agreement - Code</th>\n",
|
|||
|
|
" <th>Company - Id</th>\n",
|
|||
|
|
" <th>Company - Ultimate Parent Id</th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Registrar Account - Region</th>\n",
|
|||
|
|
" <th>RegistrarAccount - Country</th>\n",
|
|||
|
|
" <th>Product - Asset Type</th>\n",
|
|||
|
|
" <th>Product - Strategy</th>\n",
|
|||
|
|
" <th>Product - Legal Status</th>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <th>Centralisation Date</th>\n",
|
|||
|
|
" <th>Quantity - Subscription</th>\n",
|
|||
|
|
" <th>Quantity - Redemption</th>\n",
|
|||
|
|
" <th>Quantity - NetFlows</th>\n",
|
|||
|
|
" <th>Value Ccy - Subscription</th>\n",
|
|||
|
|
" <th>Value Ccy - Redemption</th>\n",
|
|||
|
|
" <th>Value Ccy - NetFlows</th>\n",
|
|||
|
|
" <th>Value € - Subscription</th>\n",
|
|||
|
|
" <th>Value € - Redemption</th>\n",
|
|||
|
|
" <th>Value € - NetFlows</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>200127202</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>EQUITY</td>\n",
|
|||
|
|
" <td>INVESTISSEMENT</td>\n",
|
|||
|
|
" <td>SICAV</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2020-11-05</td>\n",
|
|||
|
|
" <td>1636.000</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>1636.000</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>280983.00</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2015-03-09</td>\n",
|
|||
|
|
" <td>144.690</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>144.690</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>99985.13</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>EQUITY</td>\n",
|
|||
|
|
" <td>INVESTISSEMENT</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2016-10-26</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>-8.321</td>\n",
|
|||
|
|
" <td>-8.321</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" <td>-9384.76</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>EQUITY</td>\n",
|
|||
|
|
" <td>INVESTISSEMENT</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2018-10-18</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>-22.083</td>\n",
|
|||
|
|
" <td>-22.083</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" <td>-25227.40</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>003</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>166</td>\n",
|
|||
|
|
" <td>406533</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>EQUITY</td>\n",
|
|||
|
|
" <td>INVESTISSEMENT</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2019-04-08</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>-465.992</td>\n",
|
|||
|
|
" <td>-465.992</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" <td>-563775.76</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2574456</th>\n",
|
|||
|
|
" <td>2574456</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2015-06-12</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>-20.000</td>\n",
|
|||
|
|
" <td>-20.000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-34294.40</td>\n",
|
|||
|
|
" <td>-34294.40</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-34294.40</td>\n",
|
|||
|
|
" <td>-34294.40</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2574457</th>\n",
|
|||
|
|
" <td>2574457</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2015-09-18</td>\n",
|
|||
|
|
" <td>328.726</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>328.726</td>\n",
|
|||
|
|
" <td>564028.07</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>564028.07</td>\n",
|
|||
|
|
" <td>564028.07</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>564028.07</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2574458</th>\n",
|
|||
|
|
" <td>2574458</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2015-09-25</td>\n",
|
|||
|
|
" <td>4.443</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>4.443</td>\n",
|
|||
|
|
" <td>7603.66</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>7603.66</td>\n",
|
|||
|
|
" <td>7603.66</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>7603.66</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2574459</th>\n",
|
|||
|
|
" <td>2574459</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>FCP</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2015-11-09</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>-440.000</td>\n",
|
|||
|
|
" <td>-440.000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-754696.80</td>\n",
|
|||
|
|
" <td>-754696.80</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>-754696.80</td>\n",
|
|||
|
|
" <td>-754696.80</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2574460</th>\n",
|
|||
|
|
" <td>2574460</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>LUXEMBOURG</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>SICAV</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>2016-01-11</td>\n",
|
|||
|
|
" <td>3595.000</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>3595.000</td>\n",
|
|||
|
|
" <td>358385.55</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>358385.55</td>\n",
|
|||
|
|
" <td>358385.55</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>358385.55</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>2574461 rows × 25 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Unnamed: 0 Agreement - Code Company - Id \\\n",
|
|||
|
|
"0 0 003 166 \n",
|
|||
|
|
"1 1 003 166 \n",
|
|||
|
|
"2 2 003 166 \n",
|
|||
|
|
"3 3 003 166 \n",
|
|||
|
|
"4 4 003 166 \n",
|
|||
|
|
"... ... ... ... \n",
|
|||
|
|
"2574456 2574456 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574457 2574457 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574458 2574458 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574459 2574459 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574460 2574460 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"\n",
|
|||
|
|
" Company - Ultimate Parent Id Registrar Account - ID \\\n",
|
|||
|
|
"0 166 200127202 \n",
|
|||
|
|
"1 166 406533 \n",
|
|||
|
|
"2 166 406533 \n",
|
|||
|
|
"3 166 406533 \n",
|
|||
|
|
"4 166 406533 \n",
|
|||
|
|
"... ... ... \n",
|
|||
|
|
"2574456 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574457 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574458 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574459 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"2574460 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"\n",
|
|||
|
|
" Registrar Account - Region RegistrarAccount - Country \\\n",
|
|||
|
|
"0 FRANCE FRANCE \n",
|
|||
|
|
"1 FRANCE FRANCE \n",
|
|||
|
|
"2 FRANCE FRANCE \n",
|
|||
|
|
"3 FRANCE FRANCE \n",
|
|||
|
|
"4 FRANCE FRANCE \n",
|
|||
|
|
"... ... ... \n",
|
|||
|
|
"2574456 LUXEMBOURG LUXEMBOURG \n",
|
|||
|
|
"2574457 LUXEMBOURG LUXEMBOURG \n",
|
|||
|
|
"2574458 LUXEMBOURG LUXEMBOURG \n",
|
|||
|
|
"2574459 LUXEMBOURG LUXEMBOURG \n",
|
|||
|
|
"2574460 LUXEMBOURG LUXEMBOURG \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Asset Type Product - Strategy Product - Legal Status ... \\\n",
|
|||
|
|
"0 EQUITY INVESTISSEMENT SICAV ... \n",
|
|||
|
|
"1 DIVERSIFIED PATRIMOINE FCP ... \n",
|
|||
|
|
"2 EQUITY INVESTISSEMENT FCP ... \n",
|
|||
|
|
"3 EQUITY INVESTISSEMENT FCP ... \n",
|
|||
|
|
"4 EQUITY INVESTISSEMENT FCP ... \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"2574456 FIXED INCOME SÉCURITÉ FCP ... \n",
|
|||
|
|
"2574457 FIXED INCOME SÉCURITÉ FCP ... \n",
|
|||
|
|
"2574458 FIXED INCOME SÉCURITÉ FCP ... \n",
|
|||
|
|
"2574459 FIXED INCOME SÉCURITÉ FCP ... \n",
|
|||
|
|
"2574460 FIXED INCOME SÉCURITÉ SICAV ... \n",
|
|||
|
|
"\n",
|
|||
|
|
" Centralisation Date Quantity - Subscription Quantity - Redemption \\\n",
|
|||
|
|
"0 2020-11-05 1636.000 0.000 \n",
|
|||
|
|
"1 2015-03-09 144.690 0.000 \n",
|
|||
|
|
"2 2016-10-26 0.000 -8.321 \n",
|
|||
|
|
"3 2018-10-18 0.000 -22.083 \n",
|
|||
|
|
"4 2019-04-08 0.000 -465.992 \n",
|
|||
|
|
"... ... ... ... \n",
|
|||
|
|
"2574456 2015-06-12 0.000 -20.000 \n",
|
|||
|
|
"2574457 2015-09-18 328.726 0.000 \n",
|
|||
|
|
"2574458 2015-09-25 4.443 0.000 \n",
|
|||
|
|
"2574459 2015-11-09 0.000 -440.000 \n",
|
|||
|
|
"2574460 2016-01-11 3595.000 0.000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Quantity - NetFlows Value Ccy - Subscription Value Ccy - Redemption \\\n",
|
|||
|
|
"0 1636.000 280983.00 0.00 \n",
|
|||
|
|
"1 144.690 99985.13 0.00 \n",
|
|||
|
|
"2 -8.321 0.00 -9384.76 \n",
|
|||
|
|
"3 -22.083 0.00 -25227.40 \n",
|
|||
|
|
"4 -465.992 0.00 -563775.76 \n",
|
|||
|
|
"... ... ... ... \n",
|
|||
|
|
"2574456 -20.000 0.00 -34294.40 \n",
|
|||
|
|
"2574457 328.726 564028.07 0.00 \n",
|
|||
|
|
"2574458 4.443 7603.66 0.00 \n",
|
|||
|
|
"2574459 -440.000 0.00 -754696.80 \n",
|
|||
|
|
"2574460 3595.000 358385.55 0.00 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Value Ccy - NetFlows Value € - Subscription Value € - Redemption \\\n",
|
|||
|
|
"0 280983.00 280983.00 0.00 \n",
|
|||
|
|
"1 99985.13 99985.13 0.00 \n",
|
|||
|
|
"2 -9384.76 0.00 -9384.76 \n",
|
|||
|
|
"3 -25227.40 0.00 -25227.40 \n",
|
|||
|
|
"4 -563775.76 0.00 -563775.76 \n",
|
|||
|
|
"... ... ... ... \n",
|
|||
|
|
"2574456 -34294.40 0.00 -34294.40 \n",
|
|||
|
|
"2574457 564028.07 564028.07 0.00 \n",
|
|||
|
|
"2574458 7603.66 7603.66 0.00 \n",
|
|||
|
|
"2574459 -754696.80 0.00 -754696.80 \n",
|
|||
|
|
"2574460 358385.55 358385.55 0.00 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Value € - NetFlows \n",
|
|||
|
|
"0 280983.00 \n",
|
|||
|
|
"1 99985.13 \n",
|
|||
|
|
"2 -9384.76 \n",
|
|||
|
|
"3 -25227.40 \n",
|
|||
|
|
"4 -563775.76 \n",
|
|||
|
|
"... ... \n",
|
|||
|
|
"2574456 -34294.40 \n",
|
|||
|
|
"2574457 564028.07 \n",
|
|||
|
|
"2574458 7603.66 \n",
|
|||
|
|
"2574459 -754696.80 \n",
|
|||
|
|
"2574460 358385.55 \n",
|
|||
|
|
"\n",
|
|||
|
|
"[2574461 rows x 25 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"import pandas as pd \n",
|
|||
|
|
"df_flows = pd.read_csv(\"flows.csv\")\n",
|
|||
|
|
"df_flows"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 3,
|
|||
|
|
"id": "0a614aff-b7e1-4032-94d1-cb87a3cd8806",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"/tmp/ipykernel_9087/223063792.py:1: DtypeWarning: Columns (2,3,4,5) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
|
" df_aum = pd.read_csv(\"AUM_repaired.csv\")\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Unnamed: 0.1</th>\n",
|
|||
|
|
" <th>Unnamed: 0</th>\n",
|
|||
|
|
" <th>Agreement - Code</th>\n",
|
|||
|
|
" <th>Company - Id</th>\n",
|
|||
|
|
" <th>Company - Ultimate Parent Id</th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Registrar Account - Region</th>\n",
|
|||
|
|
" <th>RegistrarAccount - Country</th>\n",
|
|||
|
|
" <th>Product - Asset Type</th>\n",
|
|||
|
|
" <th>Product - Strategy</th>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <th>Product - Is Dedie ?</th>\n",
|
|||
|
|
" <th>Product - Fund</th>\n",
|
|||
|
|
" <th>Product - Shareclass Type</th>\n",
|
|||
|
|
" <th>Product - Shareclass Currency</th>\n",
|
|||
|
|
" <th>Product - Isin</th>\n",
|
|||
|
|
" <th>Centralisation Date</th>\n",
|
|||
|
|
" <th>Quantity - AUM</th>\n",
|
|||
|
|
" <th>Value - AUM CCY</th>\n",
|
|||
|
|
" <th>Value - AUM €</th>\n",
|
|||
|
|
" <th>repair_flag</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>24648.6666</td>\n",
|
|||
|
|
" <td>24648.6666</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-11-30</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22413.0553</td>\n",
|
|||
|
|
" <td>22413.0553</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-12-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22051.2406</td>\n",
|
|||
|
|
" <td>22051.2406</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2016-03-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>21626.1173</td>\n",
|
|||
|
|
" <td>21626.1173</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2016-11-30</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22489.4502</td>\n",
|
|||
|
|
" <td>22489.4502</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5516128</th>\n",
|
|||
|
|
" <td>5516128</td>\n",
|
|||
|
|
" <td>4880294</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
|
|||
|
|
" <td>AW & AW-R</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>LU1299306321</td>\n",
|
|||
|
|
" <td>2020-10-31</td>\n",
|
|||
|
|
" <td>3099.000</td>\n",
|
|||
|
|
" <td>318422.2500</td>\n",
|
|||
|
|
" <td>318422.2500</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5516129</th>\n",
|
|||
|
|
" <td>5516129</td>\n",
|
|||
|
|
" <td>4880294</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
|
|||
|
|
" <td>AW & AW-R</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>LU1299306321</td>\n",
|
|||
|
|
" <td>2020-10-31</td>\n",
|
|||
|
|
" <td>3099.000</td>\n",
|
|||
|
|
" <td>318422.2500</td>\n",
|
|||
|
|
" <td>318422.2500</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5516130</th>\n",
|
|||
|
|
" <td>5516130</td>\n",
|
|||
|
|
" <td>4880295</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
|
|||
|
|
" <td>AW & AW-R</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>LU1299306321</td>\n",
|
|||
|
|
" <td>2021-07-31</td>\n",
|
|||
|
|
" <td>2835.000</td>\n",
|
|||
|
|
" <td>297618.3000</td>\n",
|
|||
|
|
" <td>297618.3000</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5516131</th>\n",
|
|||
|
|
" <td>5516131</td>\n",
|
|||
|
|
" <td>4880295</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
|
|||
|
|
" <td>AW & AW-R</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>LU1299306321</td>\n",
|
|||
|
|
" <td>2021-07-31</td>\n",
|
|||
|
|
" <td>2835.000</td>\n",
|
|||
|
|
" <td>297618.3000</td>\n",
|
|||
|
|
" <td>297618.3000</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5516132</th>\n",
|
|||
|
|
" <td>5516132</td>\n",
|
|||
|
|
" <td>4880296</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>SWITZERLAND</td>\n",
|
|||
|
|
" <td>FIXED INCOME</td>\n",
|
|||
|
|
" <td>SÉCURITÉ</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
|
|||
|
|
" <td>FW & FW-R</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>LU1792391911</td>\n",
|
|||
|
|
" <td>2020-07-31</td>\n",
|
|||
|
|
" <td>2916.394</td>\n",
|
|||
|
|
" <td>287410.6287</td>\n",
|
|||
|
|
" <td>287410.6287</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>5516133 rows × 21 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Unnamed: 0.1 Unnamed: 0 Agreement - Code Company - Id \\\n",
|
|||
|
|
"0 0 0 3 166.0 \n",
|
|||
|
|
"1 1 1 3 166.0 \n",
|
|||
|
|
"2 2 2 3 166.0 \n",
|
|||
|
|
"3 3 3 3 166.0 \n",
|
|||
|
|
"4 4 4 3 166.0 \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"5516128 5516128 4880294 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516129 5516129 4880294 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516130 5516130 4880295 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516131 5516131 4880295 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516132 5516132 4880296 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"\n",
|
|||
|
|
" Company - Ultimate Parent Id Registrar Account - ID \\\n",
|
|||
|
|
"0 166.0 200000647 \n",
|
|||
|
|
"1 166.0 200000647 \n",
|
|||
|
|
"2 166.0 200000647 \n",
|
|||
|
|
"3 166.0 200000647 \n",
|
|||
|
|
"4 166.0 200000647 \n",
|
|||
|
|
"... ... ... \n",
|
|||
|
|
"5516128 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516129 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516130 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516131 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"5516132 PRIVATE CLIENT PRIVATE CLIENT \n",
|
|||
|
|
"\n",
|
|||
|
|
" Registrar Account - Region RegistrarAccount - Country \\\n",
|
|||
|
|
"0 FRANCE FRANCE \n",
|
|||
|
|
"1 FRANCE FRANCE \n",
|
|||
|
|
"2 FRANCE FRANCE \n",
|
|||
|
|
"3 FRANCE FRANCE \n",
|
|||
|
|
"4 FRANCE FRANCE \n",
|
|||
|
|
"... ... ... \n",
|
|||
|
|
"5516128 SWITZERLAND SWITZERLAND \n",
|
|||
|
|
"5516129 SWITZERLAND SWITZERLAND \n",
|
|||
|
|
"5516130 SWITZERLAND SWITZERLAND \n",
|
|||
|
|
"5516131 SWITZERLAND SWITZERLAND \n",
|
|||
|
|
"5516132 SWITZERLAND SWITZERLAND \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Asset Type Product - Strategy ... Product - Is Dedie ? \\\n",
|
|||
|
|
"0 DIVERSIFIED PATRIMOINE ... NO \n",
|
|||
|
|
"1 DIVERSIFIED PATRIMOINE ... NO \n",
|
|||
|
|
"2 DIVERSIFIED PATRIMOINE ... NO \n",
|
|||
|
|
"3 DIVERSIFIED PATRIMOINE ... NO \n",
|
|||
|
|
"4 DIVERSIFIED PATRIMOINE ... NO \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"5516128 FIXED INCOME SÉCURITÉ ... NO \n",
|
|||
|
|
"5516129 FIXED INCOME SÉCURITÉ ... NO \n",
|
|||
|
|
"5516130 FIXED INCOME SÉCURITÉ ... NO \n",
|
|||
|
|
"5516131 FIXED INCOME SÉCURITÉ ... NO \n",
|
|||
|
|
"5516132 FIXED INCOME SÉCURITÉ ... NO \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Fund Product - Shareclass Type \\\n",
|
|||
|
|
"0 CARMIGNAC PATRIMOINE A \n",
|
|||
|
|
"1 CARMIGNAC PATRIMOINE A \n",
|
|||
|
|
"2 CARMIGNAC PATRIMOINE A \n",
|
|||
|
|
"3 CARMIGNAC PATRIMOINE A \n",
|
|||
|
|
"4 CARMIGNAC PATRIMOINE A \n",
|
|||
|
|
"... ... ... \n",
|
|||
|
|
"5516128 CARMIGNAC PORTFOLIO SÉCURITÉ AW & AW-R \n",
|
|||
|
|
"5516129 CARMIGNAC PORTFOLIO SÉCURITÉ AW & AW-R \n",
|
|||
|
|
"5516130 CARMIGNAC PORTFOLIO SÉCURITÉ AW & AW-R \n",
|
|||
|
|
"5516131 CARMIGNAC PORTFOLIO SÉCURITÉ AW & AW-R \n",
|
|||
|
|
"5516132 CARMIGNAC PORTFOLIO SÉCURITÉ FW & FW-R \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
|
|||
|
|
"0 EUR FR0010135103 2015-03-31 \n",
|
|||
|
|
"1 EUR FR0010135103 2015-11-30 \n",
|
|||
|
|
"2 EUR FR0010135103 2015-12-31 \n",
|
|||
|
|
"3 EUR FR0010135103 2016-03-31 \n",
|
|||
|
|
"4 EUR FR0010135103 2016-11-30 \n",
|
|||
|
|
"... ... ... ... \n",
|
|||
|
|
"5516128 EUR LU1299306321 2020-10-31 \n",
|
|||
|
|
"5516129 EUR LU1299306321 2020-10-31 \n",
|
|||
|
|
"5516130 EUR LU1299306321 2021-07-31 \n",
|
|||
|
|
"5516131 EUR LU1299306321 2021-07-31 \n",
|
|||
|
|
"5516132 EUR LU1792391911 2020-07-31 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Quantity - AUM Value - AUM CCY Value - AUM € repair_flag \n",
|
|||
|
|
"0 35.368 24648.6666 24648.6666 False \n",
|
|||
|
|
"1 35.368 22413.0553 22413.0553 False \n",
|
|||
|
|
"2 35.368 22051.2406 22051.2406 False \n",
|
|||
|
|
"3 35.368 21626.1173 21626.1173 False \n",
|
|||
|
|
"4 35.368 22489.4502 22489.4502 False \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"5516128 3099.000 318422.2500 318422.2500 False \n",
|
|||
|
|
"5516129 3099.000 318422.2500 318422.2500 False \n",
|
|||
|
|
"5516130 2835.000 297618.3000 297618.3000 False \n",
|
|||
|
|
"5516131 2835.000 297618.3000 297618.3000 False \n",
|
|||
|
|
"5516132 2916.394 287410.6287 287410.6287 False \n",
|
|||
|
|
"\n",
|
|||
|
|
"[5516133 rows x 21 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 3,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_aum = pd.read_csv(\"AUM_repaired.csv\")\n",
|
|||
|
|
"df_aum"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 4,
|
|||
|
|
"id": "c4fd5fc2-8977-4f22-bae3-5acb60561042",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>n_days</th>\n",
|
|||
|
|
" <th>n_transactions</th>\n",
|
|||
|
|
" <th>total_netflows</th>\n",
|
|||
|
|
" <th>mean_flow</th>\n",
|
|||
|
|
" <th>std_flow</th>\n",
|
|||
|
|
" <th>total_subscription</th>\n",
|
|||
|
|
" <th>total_redemption</th>\n",
|
|||
|
|
" <th>churn_ratio</th>\n",
|
|||
|
|
" <th>churn_flag</th>\n",
|
|||
|
|
" <th>activity_score</th>\n",
|
|||
|
|
" <th>flow_volatility</th>\n",
|
|||
|
|
" <th>inertia_ratio</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>100000028</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>-1.092380e+02</td>\n",
|
|||
|
|
" <td>-36.412667</td>\n",
|
|||
|
|
" <td>49.280511</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-1.092380e+02</td>\n",
|
|||
|
|
" <td>-1.092380e+08</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>1.386294</td>\n",
|
|||
|
|
" <td>49.280511</td>\n",
|
|||
|
|
" <td>0.998921</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>100000042</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-6.601150e+02</td>\n",
|
|||
|
|
" <td>-660.115000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-6.601150e+02</td>\n",
|
|||
|
|
" <td>-6.601150e+08</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.693147</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.999640</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>100000065</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-1.746460e+02</td>\n",
|
|||
|
|
" <td>-174.646000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-1.746460e+02</td>\n",
|
|||
|
|
" <td>-1.746460e+08</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.693147</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.999640</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>100000069</td>\n",
|
|||
|
|
" <td>65</td>\n",
|
|||
|
|
" <td>73</td>\n",
|
|||
|
|
" <td>-7.479755e+03</td>\n",
|
|||
|
|
" <td>-102.462397</td>\n",
|
|||
|
|
" <td>2168.971331</td>\n",
|
|||
|
|
" <td>3.332040e+04</td>\n",
|
|||
|
|
" <td>-4.080016e+04</td>\n",
|
|||
|
|
" <td>-1.224480e+00</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>4.304065</td>\n",
|
|||
|
|
" <td>2168.971331</td>\n",
|
|||
|
|
" <td>0.976619</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>100000073</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-1.334020e+02</td>\n",
|
|||
|
|
" <td>-133.402000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-1.334020e+02</td>\n",
|
|||
|
|
" <td>-1.334020e+08</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.693147</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.999640</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6837</th>\n",
|
|||
|
|
" <td>422905</td>\n",
|
|||
|
|
" <td>208</td>\n",
|
|||
|
|
" <td>212</td>\n",
|
|||
|
|
" <td>-4.329218e+03</td>\n",
|
|||
|
|
" <td>-20.420840</td>\n",
|
|||
|
|
" <td>331.677297</td>\n",
|
|||
|
|
" <td>9.699140e+03</td>\n",
|
|||
|
|
" <td>-1.402836e+04</td>\n",
|
|||
|
|
" <td>-1.446351e+00</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>5.361292</td>\n",
|
|||
|
|
" <td>331.677297</td>\n",
|
|||
|
|
" <td>0.925180</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6838</th>\n",
|
|||
|
|
" <td>422906</td>\n",
|
|||
|
|
" <td>1146</td>\n",
|
|||
|
|
" <td>1556</td>\n",
|
|||
|
|
" <td>4.455099e+03</td>\n",
|
|||
|
|
" <td>2.863174</td>\n",
|
|||
|
|
" <td>201.071555</td>\n",
|
|||
|
|
" <td>6.078686e+04</td>\n",
|
|||
|
|
" <td>-5.633177e+04</td>\n",
|
|||
|
|
" <td>-9.267095e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>7.350516</td>\n",
|
|||
|
|
" <td>201.071555</td>\n",
|
|||
|
|
" <td>0.587770</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6839</th>\n",
|
|||
|
|
" <td>8307</td>\n",
|
|||
|
|
" <td>204</td>\n",
|
|||
|
|
" <td>221</td>\n",
|
|||
|
|
" <td>2.168303e+04</td>\n",
|
|||
|
|
" <td>98.113249</td>\n",
|
|||
|
|
" <td>2217.940406</td>\n",
|
|||
|
|
" <td>1.204399e+05</td>\n",
|
|||
|
|
" <td>-9.875688e+04</td>\n",
|
|||
|
|
" <td>-8.199681e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>5.402677</td>\n",
|
|||
|
|
" <td>2217.940406</td>\n",
|
|||
|
|
" <td>0.926619</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6840</th>\n",
|
|||
|
|
" <td>OFF DISTRIBUTION</td>\n",
|
|||
|
|
" <td>2656</td>\n",
|
|||
|
|
" <td>27679</td>\n",
|
|||
|
|
" <td>1.319043e+08</td>\n",
|
|||
|
|
" <td>4765.499704</td>\n",
|
|||
|
|
" <td>391347.475503</td>\n",
|
|||
|
|
" <td>3.388942e+08</td>\n",
|
|||
|
|
" <td>-2.069900e+08</td>\n",
|
|||
|
|
" <td>-6.107804e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>10.228465</td>\n",
|
|||
|
|
" <td>391347.475503</td>\n",
|
|||
|
|
" <td>0.044604</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6841</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>2690</td>\n",
|
|||
|
|
" <td>32363</td>\n",
|
|||
|
|
" <td>-4.181221e+05</td>\n",
|
|||
|
|
" <td>-12.919758</td>\n",
|
|||
|
|
" <td>7572.830139</td>\n",
|
|||
|
|
" <td>1.354277e+07</td>\n",
|
|||
|
|
" <td>-1.396089e+07</td>\n",
|
|||
|
|
" <td>-1.030874e+00</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>10.384802</td>\n",
|
|||
|
|
" <td>7572.830139</td>\n",
|
|||
|
|
" <td>0.032374</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>6842 rows × 13 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID n_days n_transactions total_netflows \\\n",
|
|||
|
|
"0 100000028 3 3 -1.092380e+02 \n",
|
|||
|
|
"1 100000042 1 1 -6.601150e+02 \n",
|
|||
|
|
"2 100000065 1 1 -1.746460e+02 \n",
|
|||
|
|
"3 100000069 65 73 -7.479755e+03 \n",
|
|||
|
|
"4 100000073 1 1 -1.334020e+02 \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"6837 422905 208 212 -4.329218e+03 \n",
|
|||
|
|
"6838 422906 1146 1556 4.455099e+03 \n",
|
|||
|
|
"6839 8307 204 221 2.168303e+04 \n",
|
|||
|
|
"6840 OFF DISTRIBUTION 2656 27679 1.319043e+08 \n",
|
|||
|
|
"6841 PRIVATE CLIENT 2690 32363 -4.181221e+05 \n",
|
|||
|
|
"\n",
|
|||
|
|
" mean_flow std_flow total_subscription total_redemption \\\n",
|
|||
|
|
"0 -36.412667 49.280511 0.000000e+00 -1.092380e+02 \n",
|
|||
|
|
"1 -660.115000 NaN 0.000000e+00 -6.601150e+02 \n",
|
|||
|
|
"2 -174.646000 NaN 0.000000e+00 -1.746460e+02 \n",
|
|||
|
|
"3 -102.462397 2168.971331 3.332040e+04 -4.080016e+04 \n",
|
|||
|
|
"4 -133.402000 NaN 0.000000e+00 -1.334020e+02 \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"6837 -20.420840 331.677297 9.699140e+03 -1.402836e+04 \n",
|
|||
|
|
"6838 2.863174 201.071555 6.078686e+04 -5.633177e+04 \n",
|
|||
|
|
"6839 98.113249 2217.940406 1.204399e+05 -9.875688e+04 \n",
|
|||
|
|
"6840 4765.499704 391347.475503 3.388942e+08 -2.069900e+08 \n",
|
|||
|
|
"6841 -12.919758 7572.830139 1.354277e+07 -1.396089e+07 \n",
|
|||
|
|
"\n",
|
|||
|
|
" churn_ratio churn_flag activity_score flow_volatility inertia_ratio \n",
|
|||
|
|
"0 -1.092380e+08 0 1.386294 49.280511 0.998921 \n",
|
|||
|
|
"1 -6.601150e+08 0 0.693147 0.000000 0.999640 \n",
|
|||
|
|
"2 -1.746460e+08 0 0.693147 0.000000 0.999640 \n",
|
|||
|
|
"3 -1.224480e+00 0 4.304065 2168.971331 0.976619 \n",
|
|||
|
|
"4 -1.334020e+08 0 0.693147 0.000000 0.999640 \n",
|
|||
|
|
"... ... ... ... ... ... \n",
|
|||
|
|
"6837 -1.446351e+00 0 5.361292 331.677297 0.925180 \n",
|
|||
|
|
"6838 -9.267095e-01 0 7.350516 201.071555 0.587770 \n",
|
|||
|
|
"6839 -8.199681e-01 0 5.402677 2217.940406 0.926619 \n",
|
|||
|
|
"6840 -6.107804e-01 0 10.228465 391347.475503 0.044604 \n",
|
|||
|
|
"6841 -1.030874e+00 0 10.384802 7572.830139 0.032374 \n",
|
|||
|
|
"\n",
|
|||
|
|
"[6842 rows x 13 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 4,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_client = pd.read_csv(\"client_behavior_clean.csv\")\n",
|
|||
|
|
"df_client"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 6,
|
|||
|
|
"id": "e2233e16-958e-4973-aed4-6c9f2cbe8397",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Flows - AUM: (2574461, 25) (5516133, 21)\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"print (\"Flows - AUM:\", df_flows.shape , df_aum.shape)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 5,
|
|||
|
|
"id": "9bca9076-a961-4004-9b5c-7b55191e2c6e",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Flows - AUM - Client shape: (2574461, 25) (5516133, 21) (6842, 13)\n",
|
|||
|
|
"Flows Columns: Index(['Unnamed: 0', 'Agreement - Code', 'Company - Id',\n",
|
|||
|
|
" 'Company - Ultimate Parent Id', 'Registrar Account - ID',\n",
|
|||
|
|
" 'Registrar Account - Region', 'RegistrarAccount - Country',\n",
|
|||
|
|
" 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n",
|
|||
|
|
" 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n",
|
|||
|
|
" 'Product - Shareclass Currency', 'Product - Isin',\n",
|
|||
|
|
" 'Centralisation Date', 'Quantity - Subscription',\n",
|
|||
|
|
" 'Quantity - Redemption', 'Quantity - NetFlows',\n",
|
|||
|
|
" 'Value Ccy - Subscription', 'Value Ccy - Redemption',\n",
|
|||
|
|
" 'Value Ccy - NetFlows', 'Value € - Subscription',\n",
|
|||
|
|
" 'Value € - Redemption', 'Value € - NetFlows'],\n",
|
|||
|
|
" dtype='object')\n",
|
|||
|
|
"AUM Columns: Index(['Unnamed: 0.1', 'Unnamed: 0', 'Agreement - Code', 'Company - Id',\n",
|
|||
|
|
" 'Company - Ultimate Parent Id', 'Registrar Account - ID',\n",
|
|||
|
|
" 'Registrar Account - Region', 'RegistrarAccount - Country',\n",
|
|||
|
|
" 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n",
|
|||
|
|
" 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n",
|
|||
|
|
" 'Product - Shareclass Currency', 'Product - Isin',\n",
|
|||
|
|
" 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n",
|
|||
|
|
" 'Value - AUM €', 'repair_flag'],\n",
|
|||
|
|
" dtype='object')\n",
|
|||
|
|
"Client Columns: Index(['Registrar Account - ID', 'n_days', 'n_transactions', 'total_netflows',\n",
|
|||
|
|
" 'mean_flow', 'std_flow', 'total_subscription', 'total_redemption',\n",
|
|||
|
|
" 'churn_ratio', 'churn_flag', 'activity_score', 'flow_volatility',\n",
|
|||
|
|
" 'inertia_ratio'],\n",
|
|||
|
|
" dtype='object')\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"print (\"Flows - AUM - Client shape:\", df_flows.shape , df_aum.shape, df_client.shape)\n",
|
|||
|
|
"print (\"Flows Columns:\" , df_flows.columns )\n",
|
|||
|
|
"print (\"AUM Columns:\" , df_aum.columns )\n",
|
|||
|
|
"print (\"Client Columns:\" , df_client.columns )"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 6,
|
|||
|
|
"id": "c2a1a52f-b0b9-4ecd-bf9f-2f9759e0cd24",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Unnamed: 0.1</th>\n",
|
|||
|
|
" <th>Unnamed: 0</th>\n",
|
|||
|
|
" <th>Agreement - Code</th>\n",
|
|||
|
|
" <th>Company - Id</th>\n",
|
|||
|
|
" <th>Company - Ultimate Parent Id</th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Registrar Account - Region</th>\n",
|
|||
|
|
" <th>RegistrarAccount - Country</th>\n",
|
|||
|
|
" <th>Product - Asset Type</th>\n",
|
|||
|
|
" <th>Product - Strategy</th>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <th>Product - Is Dedie ?</th>\n",
|
|||
|
|
" <th>Product - Fund</th>\n",
|
|||
|
|
" <th>Product - Shareclass Type</th>\n",
|
|||
|
|
" <th>Product - Shareclass Currency</th>\n",
|
|||
|
|
" <th>Product - Isin</th>\n",
|
|||
|
|
" <th>Centralisation Date</th>\n",
|
|||
|
|
" <th>Quantity - AUM</th>\n",
|
|||
|
|
" <th>Value - AUM CCY</th>\n",
|
|||
|
|
" <th>Value - AUM €</th>\n",
|
|||
|
|
" <th>repair_flag</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>24648.6666</td>\n",
|
|||
|
|
" <td>24648.6666</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-11-30</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22413.0553</td>\n",
|
|||
|
|
" <td>22413.0553</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2015-12-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22051.2406</td>\n",
|
|||
|
|
" <td>22051.2406</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2016-03-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>21626.1173</td>\n",
|
|||
|
|
" <td>21626.1173</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2016-11-30</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22489.4502</td>\n",
|
|||
|
|
" <td>22489.4502</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5</th>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>5</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2017-06-30</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>23225.4582</td>\n",
|
|||
|
|
" <td>23225.4582</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6</th>\n",
|
|||
|
|
" <td>6</td>\n",
|
|||
|
|
" <td>6</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2017-08-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22964.0887</td>\n",
|
|||
|
|
" <td>22964.0887</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>7</th>\n",
|
|||
|
|
" <td>7</td>\n",
|
|||
|
|
" <td>7</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2018-03-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22692.4625</td>\n",
|
|||
|
|
" <td>22692.4625</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>8</th>\n",
|
|||
|
|
" <td>8</td>\n",
|
|||
|
|
" <td>8</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2018-05-31</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>22749.7586</td>\n",
|
|||
|
|
" <td>22749.7586</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>9</th>\n",
|
|||
|
|
" <td>9</td>\n",
|
|||
|
|
" <td>9</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>166.0</td>\n",
|
|||
|
|
" <td>200000647</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>FRANCE</td>\n",
|
|||
|
|
" <td>DIVERSIFIED</td>\n",
|
|||
|
|
" <td>PATRIMOINE</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>NO</td>\n",
|
|||
|
|
" <td>CARMIGNAC PATRIMOINE</td>\n",
|
|||
|
|
" <td>A</td>\n",
|
|||
|
|
" <td>EUR</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>2019-02-28</td>\n",
|
|||
|
|
" <td>35.368</td>\n",
|
|||
|
|
" <td>20875.9620</td>\n",
|
|||
|
|
" <td>20875.9620</td>\n",
|
|||
|
|
" <td>False</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>10 rows × 21 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Unnamed: 0.1 Unnamed: 0 Agreement - Code Company - Id \\\n",
|
|||
|
|
"0 0 0 3 166.0 \n",
|
|||
|
|
"1 1 1 3 166.0 \n",
|
|||
|
|
"2 2 2 3 166.0 \n",
|
|||
|
|
"3 3 3 3 166.0 \n",
|
|||
|
|
"4 4 4 3 166.0 \n",
|
|||
|
|
"5 5 5 3 166.0 \n",
|
|||
|
|
"6 6 6 3 166.0 \n",
|
|||
|
|
"7 7 7 3 166.0 \n",
|
|||
|
|
"8 8 8 3 166.0 \n",
|
|||
|
|
"9 9 9 3 166.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Company - Ultimate Parent Id Registrar Account - ID \\\n",
|
|||
|
|
"0 166.0 200000647 \n",
|
|||
|
|
"1 166.0 200000647 \n",
|
|||
|
|
"2 166.0 200000647 \n",
|
|||
|
|
"3 166.0 200000647 \n",
|
|||
|
|
"4 166.0 200000647 \n",
|
|||
|
|
"5 166.0 200000647 \n",
|
|||
|
|
"6 166.0 200000647 \n",
|
|||
|
|
"7 166.0 200000647 \n",
|
|||
|
|
"8 166.0 200000647 \n",
|
|||
|
|
"9 166.0 200000647 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Registrar Account - Region RegistrarAccount - Country Product - Asset Type \\\n",
|
|||
|
|
"0 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"1 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"2 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"3 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"4 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"5 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"6 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"7 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"8 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"9 FRANCE FRANCE DIVERSIFIED \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Strategy ... Product - Is Dedie ? Product - Fund \\\n",
|
|||
|
|
"0 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"1 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"2 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"3 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"4 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"5 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"6 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"7 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"8 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"9 PATRIMOINE ... NO CARMIGNAC PATRIMOINE \n",
|
|||
|
|
"\n",
|
|||
|
|
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
|
|||
|
|
"0 A EUR FR0010135103 \n",
|
|||
|
|
"1 A EUR FR0010135103 \n",
|
|||
|
|
"2 A EUR FR0010135103 \n",
|
|||
|
|
"3 A EUR FR0010135103 \n",
|
|||
|
|
"4 A EUR FR0010135103 \n",
|
|||
|
|
"5 A EUR FR0010135103 \n",
|
|||
|
|
"6 A EUR FR0010135103 \n",
|
|||
|
|
"7 A EUR FR0010135103 \n",
|
|||
|
|
"8 A EUR FR0010135103 \n",
|
|||
|
|
"9 A EUR FR0010135103 \n",
|
|||
|
|
"\n",
|
|||
|
|
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \\\n",
|
|||
|
|
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
|
|||
|
|
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
|
|||
|
|
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
|
|||
|
|
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
|
|||
|
|
"4 2016-11-30 35.368 22489.4502 22489.4502 \n",
|
|||
|
|
"5 2017-06-30 35.368 23225.4582 23225.4582 \n",
|
|||
|
|
"6 2017-08-31 35.368 22964.0887 22964.0887 \n",
|
|||
|
|
"7 2018-03-31 35.368 22692.4625 22692.4625 \n",
|
|||
|
|
"8 2018-05-31 35.368 22749.7586 22749.7586 \n",
|
|||
|
|
"9 2019-02-28 35.368 20875.9620 20875.9620 \n",
|
|||
|
|
"\n",
|
|||
|
|
" repair_flag \n",
|
|||
|
|
"0 False \n",
|
|||
|
|
"1 False \n",
|
|||
|
|
"2 False \n",
|
|||
|
|
"3 False \n",
|
|||
|
|
"4 False \n",
|
|||
|
|
"5 False \n",
|
|||
|
|
"6 False \n",
|
|||
|
|
"7 False \n",
|
|||
|
|
"8 False \n",
|
|||
|
|
"9 False \n",
|
|||
|
|
"\n",
|
|||
|
|
"[10 rows x 21 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 6,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_aum.head(10)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 7,
|
|||
|
|
"id": "fc435518-e8a1-4ad5-bb72-e482e3cc6d75",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>n_days</th>\n",
|
|||
|
|
" <th>n_transactions</th>\n",
|
|||
|
|
" <th>total_netflows</th>\n",
|
|||
|
|
" <th>mean_flow</th>\n",
|
|||
|
|
" <th>std_flow</th>\n",
|
|||
|
|
" <th>total_subscription</th>\n",
|
|||
|
|
" <th>total_redemption</th>\n",
|
|||
|
|
" <th>churn_ratio</th>\n",
|
|||
|
|
" <th>churn_flag</th>\n",
|
|||
|
|
" <th>activity_score</th>\n",
|
|||
|
|
" <th>flow_volatility</th>\n",
|
|||
|
|
" <th>inertia_ratio</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6832</th>\n",
|
|||
|
|
" <td>422876</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-1.229300e+01</td>\n",
|
|||
|
|
" <td>-12.293000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-1.229300e+01</td>\n",
|
|||
|
|
" <td>-1.229300e+07</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.693147</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.999640</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6833</th>\n",
|
|||
|
|
" <td>422877</td>\n",
|
|||
|
|
" <td>9</td>\n",
|
|||
|
|
" <td>10</td>\n",
|
|||
|
|
" <td>-5.860000e+02</td>\n",
|
|||
|
|
" <td>-58.600000</td>\n",
|
|||
|
|
" <td>34788.810510</td>\n",
|
|||
|
|
" <td>1.096470e+05</td>\n",
|
|||
|
|
" <td>-1.102330e+05</td>\n",
|
|||
|
|
" <td>-1.005344e+00</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>2.397895</td>\n",
|
|||
|
|
" <td>34788.810510</td>\n",
|
|||
|
|
" <td>0.996763</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6834</th>\n",
|
|||
|
|
" <td>422884</td>\n",
|
|||
|
|
" <td>8</td>\n",
|
|||
|
|
" <td>8</td>\n",
|
|||
|
|
" <td>1.807000e+04</td>\n",
|
|||
|
|
" <td>2258.750000</td>\n",
|
|||
|
|
" <td>7296.392333</td>\n",
|
|||
|
|
" <td>2.044500e+04</td>\n",
|
|||
|
|
" <td>-2.375000e+03</td>\n",
|
|||
|
|
" <td>-1.161653e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>2.197225</td>\n",
|
|||
|
|
" <td>7296.392333</td>\n",
|
|||
|
|
" <td>0.997122</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6835</th>\n",
|
|||
|
|
" <td>422891</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-5.358000e+01</td>\n",
|
|||
|
|
" <td>-53.580000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-5.358000e+01</td>\n",
|
|||
|
|
" <td>-5.358000e+07</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.693147</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.999640</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6836</th>\n",
|
|||
|
|
" <td>422895</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>-1.811520e+02</td>\n",
|
|||
|
|
" <td>-181.152000</td>\n",
|
|||
|
|
" <td>NaN</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>-1.811520e+02</td>\n",
|
|||
|
|
" <td>-1.811520e+08</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.693147</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.999640</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6837</th>\n",
|
|||
|
|
" <td>422905</td>\n",
|
|||
|
|
" <td>208</td>\n",
|
|||
|
|
" <td>212</td>\n",
|
|||
|
|
" <td>-4.329218e+03</td>\n",
|
|||
|
|
" <td>-20.420840</td>\n",
|
|||
|
|
" <td>331.677297</td>\n",
|
|||
|
|
" <td>9.699140e+03</td>\n",
|
|||
|
|
" <td>-1.402836e+04</td>\n",
|
|||
|
|
" <td>-1.446351e+00</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>5.361292</td>\n",
|
|||
|
|
" <td>331.677297</td>\n",
|
|||
|
|
" <td>0.925180</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6838</th>\n",
|
|||
|
|
" <td>422906</td>\n",
|
|||
|
|
" <td>1146</td>\n",
|
|||
|
|
" <td>1556</td>\n",
|
|||
|
|
" <td>4.455099e+03</td>\n",
|
|||
|
|
" <td>2.863174</td>\n",
|
|||
|
|
" <td>201.071555</td>\n",
|
|||
|
|
" <td>6.078686e+04</td>\n",
|
|||
|
|
" <td>-5.633177e+04</td>\n",
|
|||
|
|
" <td>-9.267095e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>7.350516</td>\n",
|
|||
|
|
" <td>201.071555</td>\n",
|
|||
|
|
" <td>0.587770</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6839</th>\n",
|
|||
|
|
" <td>8307</td>\n",
|
|||
|
|
" <td>204</td>\n",
|
|||
|
|
" <td>221</td>\n",
|
|||
|
|
" <td>2.168303e+04</td>\n",
|
|||
|
|
" <td>98.113249</td>\n",
|
|||
|
|
" <td>2217.940406</td>\n",
|
|||
|
|
" <td>1.204399e+05</td>\n",
|
|||
|
|
" <td>-9.875688e+04</td>\n",
|
|||
|
|
" <td>-8.199681e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>5.402677</td>\n",
|
|||
|
|
" <td>2217.940406</td>\n",
|
|||
|
|
" <td>0.926619</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6840</th>\n",
|
|||
|
|
" <td>OFF DISTRIBUTION</td>\n",
|
|||
|
|
" <td>2656</td>\n",
|
|||
|
|
" <td>27679</td>\n",
|
|||
|
|
" <td>1.319043e+08</td>\n",
|
|||
|
|
" <td>4765.499704</td>\n",
|
|||
|
|
" <td>391347.475503</td>\n",
|
|||
|
|
" <td>3.388942e+08</td>\n",
|
|||
|
|
" <td>-2.069900e+08</td>\n",
|
|||
|
|
" <td>-6.107804e-01</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>10.228465</td>\n",
|
|||
|
|
" <td>391347.475503</td>\n",
|
|||
|
|
" <td>0.044604</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6841</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>2690</td>\n",
|
|||
|
|
" <td>32363</td>\n",
|
|||
|
|
" <td>-4.181221e+05</td>\n",
|
|||
|
|
" <td>-12.919758</td>\n",
|
|||
|
|
" <td>7572.830139</td>\n",
|
|||
|
|
" <td>1.354277e+07</td>\n",
|
|||
|
|
" <td>-1.396089e+07</td>\n",
|
|||
|
|
" <td>-1.030874e+00</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>10.384802</td>\n",
|
|||
|
|
" <td>7572.830139</td>\n",
|
|||
|
|
" <td>0.032374</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID n_days n_transactions total_netflows \\\n",
|
|||
|
|
"6832 422876 1 1 -1.229300e+01 \n",
|
|||
|
|
"6833 422877 9 10 -5.860000e+02 \n",
|
|||
|
|
"6834 422884 8 8 1.807000e+04 \n",
|
|||
|
|
"6835 422891 1 1 -5.358000e+01 \n",
|
|||
|
|
"6836 422895 1 1 -1.811520e+02 \n",
|
|||
|
|
"6837 422905 208 212 -4.329218e+03 \n",
|
|||
|
|
"6838 422906 1146 1556 4.455099e+03 \n",
|
|||
|
|
"6839 8307 204 221 2.168303e+04 \n",
|
|||
|
|
"6840 OFF DISTRIBUTION 2656 27679 1.319043e+08 \n",
|
|||
|
|
"6841 PRIVATE CLIENT 2690 32363 -4.181221e+05 \n",
|
|||
|
|
"\n",
|
|||
|
|
" mean_flow std_flow total_subscription total_redemption \\\n",
|
|||
|
|
"6832 -12.293000 NaN 0.000000e+00 -1.229300e+01 \n",
|
|||
|
|
"6833 -58.600000 34788.810510 1.096470e+05 -1.102330e+05 \n",
|
|||
|
|
"6834 2258.750000 7296.392333 2.044500e+04 -2.375000e+03 \n",
|
|||
|
|
"6835 -53.580000 NaN 0.000000e+00 -5.358000e+01 \n",
|
|||
|
|
"6836 -181.152000 NaN 0.000000e+00 -1.811520e+02 \n",
|
|||
|
|
"6837 -20.420840 331.677297 9.699140e+03 -1.402836e+04 \n",
|
|||
|
|
"6838 2.863174 201.071555 6.078686e+04 -5.633177e+04 \n",
|
|||
|
|
"6839 98.113249 2217.940406 1.204399e+05 -9.875688e+04 \n",
|
|||
|
|
"6840 4765.499704 391347.475503 3.388942e+08 -2.069900e+08 \n",
|
|||
|
|
"6841 -12.919758 7572.830139 1.354277e+07 -1.396089e+07 \n",
|
|||
|
|
"\n",
|
|||
|
|
" churn_ratio churn_flag activity_score flow_volatility inertia_ratio \n",
|
|||
|
|
"6832 -1.229300e+07 0 0.693147 0.000000 0.999640 \n",
|
|||
|
|
"6833 -1.005344e+00 0 2.397895 34788.810510 0.996763 \n",
|
|||
|
|
"6834 -1.161653e-01 0 2.197225 7296.392333 0.997122 \n",
|
|||
|
|
"6835 -5.358000e+07 0 0.693147 0.000000 0.999640 \n",
|
|||
|
|
"6836 -1.811520e+08 0 0.693147 0.000000 0.999640 \n",
|
|||
|
|
"6837 -1.446351e+00 0 5.361292 331.677297 0.925180 \n",
|
|||
|
|
"6838 -9.267095e-01 0 7.350516 201.071555 0.587770 \n",
|
|||
|
|
"6839 -8.199681e-01 0 5.402677 2217.940406 0.926619 \n",
|
|||
|
|
"6840 -6.107804e-01 0 10.228465 391347.475503 0.044604 \n",
|
|||
|
|
"6841 -1.030874e+00 0 10.384802 7572.830139 0.032374 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 7,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_client.tail(10)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 10,
|
|||
|
|
"id": "c61af497-1950-4a94-9b6d-51548466e133",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"491"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 10,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_aum['Product - Isin'].nunique()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 12,
|
|||
|
|
"id": "7f0dd0a7-5800-46b1-a10c-682d85377385",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"Registrar Account - ID 15491\n",
|
|||
|
|
"Product - Isin 491\n",
|
|||
|
|
"dtype: int64"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 12,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_aum[['Registrar Account - ID','Product - Isin']].nunique()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 13,
|
|||
|
|
"id": "446e199c-b6a0-47e3-ac3e-50791549353e",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>Product - Isin</th>\n",
|
|||
|
|
" <th>n_snapshots</th>\n",
|
|||
|
|
" <th>first_date</th>\n",
|
|||
|
|
" <th>last_date</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>FR0010135103</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>2021-08-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>FR0010147603</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>2021-08-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>FR0010148981</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>2021-08-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>FR0010148999</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>2021-08-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>FR0010149096</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>2021-08-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>...</th>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" <td>...</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>79654</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LU2715954330</td>\n",
|
|||
|
|
" <td>3</td>\n",
|
|||
|
|
" <td>2025-08-31</td>\n",
|
|||
|
|
" <td>2025-10-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>79655</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LU2715954504</td>\n",
|
|||
|
|
" <td>4</td>\n",
|
|||
|
|
" <td>2025-07-31</td>\n",
|
|||
|
|
" <td>2025-10-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>79656</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LU2799473124</td>\n",
|
|||
|
|
" <td>13</td>\n",
|
|||
|
|
" <td>2024-10-31</td>\n",
|
|||
|
|
" <td>2025-10-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>79657</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LU2809794220</td>\n",
|
|||
|
|
" <td>12</td>\n",
|
|||
|
|
" <td>2024-11-30</td>\n",
|
|||
|
|
" <td>2025-10-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>79658</th>\n",
|
|||
|
|
" <td>PRIVATE CLIENT</td>\n",
|
|||
|
|
" <td>LU2809794576</td>\n",
|
|||
|
|
" <td>2</td>\n",
|
|||
|
|
" <td>2025-09-30</td>\n",
|
|||
|
|
" <td>2025-10-31</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"<p>79659 rows × 5 columns</p>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID Product - Isin n_snapshots first_date \\\n",
|
|||
|
|
"0 7905 FR0010135103 80 2015-01-31 \n",
|
|||
|
|
"1 7905 FR0010147603 80 2015-01-31 \n",
|
|||
|
|
"2 7905 FR0010148981 80 2015-01-31 \n",
|
|||
|
|
"3 7905 FR0010148999 80 2015-01-31 \n",
|
|||
|
|
"4 7905 FR0010149096 80 2015-01-31 \n",
|
|||
|
|
"... ... ... ... ... \n",
|
|||
|
|
"79654 PRIVATE CLIENT LU2715954330 3 2025-08-31 \n",
|
|||
|
|
"79655 PRIVATE CLIENT LU2715954504 4 2025-07-31 \n",
|
|||
|
|
"79656 PRIVATE CLIENT LU2799473124 13 2024-10-31 \n",
|
|||
|
|
"79657 PRIVATE CLIENT LU2809794220 12 2024-11-30 \n",
|
|||
|
|
"79658 PRIVATE CLIENT LU2809794576 2 2025-09-30 \n",
|
|||
|
|
"\n",
|
|||
|
|
" last_date \n",
|
|||
|
|
"0 2021-08-31 \n",
|
|||
|
|
"1 2021-08-31 \n",
|
|||
|
|
"2 2021-08-31 \n",
|
|||
|
|
"3 2021-08-31 \n",
|
|||
|
|
"4 2021-08-31 \n",
|
|||
|
|
"... ... \n",
|
|||
|
|
"79654 2025-10-31 \n",
|
|||
|
|
"79655 2025-10-31 \n",
|
|||
|
|
"79656 2025-10-31 \n",
|
|||
|
|
"79657 2025-10-31 \n",
|
|||
|
|
"79658 2025-10-31 \n",
|
|||
|
|
"\n",
|
|||
|
|
"[79659 rows x 5 columns]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 13,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df= df_aum.copy()\n",
|
|||
|
|
"snap_count = (\n",
|
|||
|
|
" df.groupby([\"Registrar Account - ID\", \"Product - Isin\"])\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_snapshots=(\"Centralisation Date\", \"nunique\"),\n",
|
|||
|
|
" first_date=(\"Centralisation Date\", \"min\"),\n",
|
|||
|
|
" last_date=(\"Centralisation Date\", \"max\")\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .reset_index()\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"snap_count\n",
|
|||
|
|
"#pour aum y a a peut pres 12 valeurs par an (certain en ont moins), mais bcp plus de lignes dans df aum que df flows rev c est a cause de quoi"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "514db9b1-5373-4412-8d6b-25abcbc0d741",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Pour chaque couple (Account, ISIN) :\n",
|
|||
|
|
"n_snapshots : combien de fois l’AUM est observé\n",
|
|||
|
|
"first_date, last_date : période couverte\n",
|
|||
|
|
"\n",
|
|||
|
|
"Avant clustering, je veux savoir si :\n",
|
|||
|
|
"- certains portefeuilles sont très fragmentés,\n",
|
|||
|
|
"- certains comptes n’ont que 1 ou 2 observations (peu fiables),\n",
|
|||
|
|
"- la couverture temporelle est stable"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "000095ec-b983-4169-be11-28bbb68fc295",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## debut etude clustering"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 14,
|
|||
|
|
"id": "e77598d7-ee1e-4932-91e5-71a190d04e7a",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# Parse dates\n",
|
|||
|
|
"df_flows[\"Centralisation Date\"] = pd.to_datetime(df_flows[\"Centralisation Date\"], errors=\"coerce\")\n",
|
|||
|
|
"df_aum[\"Centralisation Date\"] = pd.to_datetime(df_aum[\"Centralisation Date\"], errors=\"coerce\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"ID_COL = \"Registrar Account - ID\"\n",
|
|||
|
|
"FLOW_COL = \"Quantity - NetFlows\"\n",
|
|||
|
|
"AUM_COL = \"Quantity - AUM\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Month key\n",
|
|||
|
|
"df_flows[\"month\"] = df_flows[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"df_aum[\"month\"] = df_aum[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
|
|||
|
|
"# Flows sont journaliers, AUM est mensuel → il faut une granularité commune."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 12,
|
|||
|
|
"id": "7d933eba-1a21-486f-9518-34b43a6fee51",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"(968429, 6)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>month</th>\n",
|
|||
|
|
" <th>aum_qty</th>\n",
|
|||
|
|
" <th>net_flow_qty</th>\n",
|
|||
|
|
" <th>gross_flow_qty</th>\n",
|
|||
|
|
" <th>n_tx</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>2015-01-31</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>2015-02-28</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>2015-03-31</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>2015-04-30</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>2015-05-31</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID month aum_qty net_flow_qty gross_flow_qty \\\n",
|
|||
|
|
"0 7905 2015-01-31 0.0 0.0 0.0 \n",
|
|||
|
|
"1 7905 2015-02-28 0.0 0.0 0.0 \n",
|
|||
|
|
"2 7905 2015-03-31 0.0 0.0 0.0 \n",
|
|||
|
|
"3 7905 2015-04-30 0.0 0.0 0.0 \n",
|
|||
|
|
"4 7905 2015-05-31 0.0 0.0 0.0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx \n",
|
|||
|
|
"0 0 \n",
|
|||
|
|
"1 0 \n",
|
|||
|
|
"2 0 \n",
|
|||
|
|
"3 0 \n",
|
|||
|
|
"4 0 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 12,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# 1) Monthly aggregation for FLOWS : je fais mon etude mensuel parce que aum valeur mensuel \n",
|
|||
|
|
"\n",
|
|||
|
|
"ID_COL = \"Registrar Account - ID\"\n",
|
|||
|
|
"FLOW_COL = \"Quantity - NetFlows\"\n",
|
|||
|
|
"AUM_COL = \"Quantity - AUM\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_flows_m = (\n",
|
|||
|
|
" df_flows\n",
|
|||
|
|
" .dropna(subset=[ID_COL, \"month\", FLOW_COL])\n",
|
|||
|
|
" .assign(gross_flow_qty=lambda x: x[FLOW_COL].abs()) # absolute quantity moved\n",
|
|||
|
|
" .groupby([ID_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" net_flow_qty=(FLOW_COL, \"sum\"), # net quantity change over the month\n",
|
|||
|
|
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"), # total traded quantity (activity intensity)\n",
|
|||
|
|
" n_tx=(FLOW_COL, \"size\"), # number of transactions\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 2) Monthly aggregation for AUM (client-month holdings) ---\n",
|
|||
|
|
"df_aum_m = (\n",
|
|||
|
|
" df_aum\n",
|
|||
|
|
" .dropna(subset=[ID_COL, \"month\", AUM_COL])\n",
|
|||
|
|
" .groupby([ID_COL, \"month\"], as_index=False)\n",
|
|||
|
|
" .agg(aum_qty=(AUM_COL, \"sum\")) # total held quantity across ISINs\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_month = df_aum_m.merge(df_flows_m, on=[ID_COL, \"month\"], how=\"left\")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 4) Months without transactions => flows are 0 ---\n",
|
|||
|
|
"df_month[\"net_flow_qty\"] = df_month[\"net_flow_qty\"].fillna(0.0)\n",
|
|||
|
|
"df_month[\"gross_flow_qty\"] = df_month[\"gross_flow_qty\"].fillna(0.0)\n",
|
|||
|
|
"df_month[\"n_tx\"] = df_month[\"n_tx\"].fillna(0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(df_month.shape)\n",
|
|||
|
|
"df_month.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 13,
|
|||
|
|
"id": "887c9159-a86f-487e-8dfe-617132a73fbe",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>Registrar Account - ID</th>\n",
|
|||
|
|
" <th>n_months</th>\n",
|
|||
|
|
" <th>n_active_months</th>\n",
|
|||
|
|
" <th>flow_freq</th>\n",
|
|||
|
|
" <th>aum_qty_mean</th>\n",
|
|||
|
|
" <th>aum_qty_median</th>\n",
|
|||
|
|
" <th>net_flow_qty_sum</th>\n",
|
|||
|
|
" <th>gross_flow_qty_sum</th>\n",
|
|||
|
|
" <th>gross_flow_qty_mean</th>\n",
|
|||
|
|
" <th>net_flow_qty_vol</th>\n",
|
|||
|
|
" <th>rel_intensity</th>\n",
|
|||
|
|
" <th>netflow_to_aum</th>\n",
|
|||
|
|
" <th>n_tx_total</th>\n",
|
|||
|
|
" <th>log_aum_qty_mean</th>\n",
|
|||
|
|
" <th>log_gross_flow_qty_mean</th>\n",
|
|||
|
|
" <th>gross_flow_to_aum</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0</th>\n",
|
|||
|
|
" <td>7905</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1</th>\n",
|
|||
|
|
" <td>7912</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2</th>\n",
|
|||
|
|
" <td>7962</td>\n",
|
|||
|
|
" <td>80</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3</th>\n",
|
|||
|
|
" <td>8307</td>\n",
|
|||
|
|
" <td>130</td>\n",
|
|||
|
|
" <td>77</td>\n",
|
|||
|
|
" <td>0.592308</td>\n",
|
|||
|
|
" <td>21568.649523</td>\n",
|
|||
|
|
" <td>20698.97</td>\n",
|
|||
|
|
" <td>27252.124</td>\n",
|
|||
|
|
" <td>177077.31</td>\n",
|
|||
|
|
" <td>1362.133154</td>\n",
|
|||
|
|
" <td>3508.455222</td>\n",
|
|||
|
|
" <td>0.056109</td>\n",
|
|||
|
|
" <td>0.011473</td>\n",
|
|||
|
|
" <td>161</td>\n",
|
|||
|
|
" <td>9.979042</td>\n",
|
|||
|
|
" <td>7.217541</td>\n",
|
|||
|
|
" <td>8.20994</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4</th>\n",
|
|||
|
|
" <td>8354</td>\n",
|
|||
|
|
" <td>64</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000</td>\n",
|
|||
|
|
" <td>0.00</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.00000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
|
|||
|
|
"0 7905 80 0 0.000000 0.000000 \n",
|
|||
|
|
"1 7912 80 0 0.000000 0.000000 \n",
|
|||
|
|
"2 7962 80 0 0.000000 0.000000 \n",
|
|||
|
|
"3 8307 130 77 0.592308 21568.649523 \n",
|
|||
|
|
"4 8354 64 0 0.000000 0.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" aum_qty_median net_flow_qty_sum gross_flow_qty_sum gross_flow_qty_mean \\\n",
|
|||
|
|
"0 0.00 0.000 0.00 0.000000 \n",
|
|||
|
|
"1 0.00 0.000 0.00 0.000000 \n",
|
|||
|
|
"2 0.00 0.000 0.00 0.000000 \n",
|
|||
|
|
"3 20698.97 27252.124 177077.31 1362.133154 \n",
|
|||
|
|
"4 0.00 0.000 0.00 0.000000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" net_flow_qty_vol rel_intensity netflow_to_aum n_tx_total \\\n",
|
|||
|
|
"0 0.000000 0.000000 0.000000 0 \n",
|
|||
|
|
"1 0.000000 0.000000 0.000000 0 \n",
|
|||
|
|
"2 0.000000 0.000000 0.000000 0 \n",
|
|||
|
|
"3 3508.455222 0.056109 0.011473 161 \n",
|
|||
|
|
"4 0.000000 0.000000 0.000000 0 \n",
|
|||
|
|
"\n",
|
|||
|
|
" log_aum_qty_mean log_gross_flow_qty_mean gross_flow_to_aum \n",
|
|||
|
|
"0 0.000000 0.000000 0.00000 \n",
|
|||
|
|
"1 0.000000 0.000000 0.00000 \n",
|
|||
|
|
"2 0.000000 0.000000 0.00000 \n",
|
|||
|
|
"3 9.979042 7.217541 8.20994 \n",
|
|||
|
|
"4 0.000000 0.000000 0.00000 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 13,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"eps = 1e-9 \n",
|
|||
|
|
"\n",
|
|||
|
|
"# 1) Active month indicator: did the client trade this month?\n",
|
|||
|
|
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
|
|||
|
|
"\n",
|
|||
|
|
"#client avec beaucoup de mois à 0 → “stable / dormant”\n",
|
|||
|
|
"#client actif presque tous les mois → “rebalancer / institutionnel actif”\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 2) Monthly relative intensity (turnover proxy in quantity terms) : Mesurer l’intensité de trading relativement à la taille et pouvoir ocmparer client petit avec client plus gros\n",
|
|||
|
|
"df_month[\"rel_intensity_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + eps)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 3) Monthly net flow ratio (directional change): sert a Capturer la direction de la dynamique\n",
|
|||
|
|
"df_month[\"netflow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + eps)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 4) Aggregate to client-level features (1 row per client)\n",
|
|||
|
|
"df_client_feat = (\n",
|
|||
|
|
" df_month.groupby(ID_COL, as_index=False)\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" # Coverage / activity\n",
|
|||
|
|
" n_months=(\"month\", \"nunique\"),\n",
|
|||
|
|
" n_active_months=(\"active_month\", \"sum\"),\n",
|
|||
|
|
" flow_freq=(\"active_month\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Size in quantity terms\n",
|
|||
|
|
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
|
|||
|
|
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Flows in quantity terms\n",
|
|||
|
|
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
|
|||
|
|
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Dispersion / volatility proxy\n",
|
|||
|
|
" net_flow_qty_vol=(\"net_flow_qty\", \"std\"),\n",
|
|||
|
|
" rel_intensity=(\"rel_intensity_m\", \"mean\"),\n",
|
|||
|
|
" netflow_to_aum=(\"netflow_to_aum_m\", \"mean\"),\n",
|
|||
|
|
"\n",
|
|||
|
|
" # Trading frequency proxy\n",
|
|||
|
|
" n_tx_total=(\"n_tx\", \"sum\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 5) Clean NaNs due to std on constant series\n",
|
|||
|
|
"df_client_feat[\"net_flow_qty_vol\"] = df_client_feat[\"net_flow_qty_vol\"].fillna(0.0)\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 6) Log transforms (useful because distributions are heavy-tailed)\n",
|
|||
|
|
"df_client_feat[\"log_aum_qty_mean\"] = np.log1p(df_client_feat[\"aum_qty_mean\"].clip(lower=0))\n",
|
|||
|
|
"df_client_feat[\"log_gross_flow_qty_mean\"] = np.log1p(df_client_feat[\"gross_flow_qty_mean\"].clip(lower=0))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# 7) Global turnover proxy\n",
|
|||
|
|
"df_client_feat[\"gross_flow_to_aum\"] = df_client_feat[\"gross_flow_qty_sum\"] / (df_client_feat[\"aum_qty_mean\"].abs() + eps)\n",
|
|||
|
|
"\n",
|
|||
|
|
"df_client_feat.head()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "7650f113-fb48-41c0-87a4-5b9509086ffd",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Pour faire du clustering, il faut un tableau où :\n",
|
|||
|
|
"- 1 ligne = 1 “objet à classer” (ici : un client / compte)\n",
|
|||
|
|
"- colonnes = variables comparables entre clients\n",
|
|||
|
|
"\n",
|
|||
|
|
"Ce code sert à :\n",
|
|||
|
|
"1- transformer une série temporelle mensuelle (client × mois)\n",
|
|||
|
|
"2- en un vecteur de caractéristiques (client → features)\n",
|
|||
|
|
"! utilisable en clustering."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 14,
|
|||
|
|
"id": "f07707c2-480c-4dd1-8767-95d3582b3149",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"(15491, 16)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 14,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"df_client_feat.shape"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 15,
|
|||
|
|
"id": "5e993274-eace-4527-b824-94144f155688",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Filtered clients: (9743, 16)\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"dfc = df_client_feat.copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Minimal filters (adjust if needed)\n",
|
|||
|
|
"dfc = dfc[(dfc[\"n_months\"] >= 6)] # at least 6 observed months\n",
|
|||
|
|
"dfc = dfc[(dfc[\"aum_qty_mean\"].abs() > 0)] # avoid zero holdings\n",
|
|||
|
|
"print(\"Filtered clients:\", dfc.shape)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "7ff3ffbc-58d8-482f-a027-80b032897c33",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Baseline clustering 1"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 16,
|
|||
|
|
"id": "1a9e1bcb-8f5d-4fce-8925-ef6bfea84967",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Q33: 0.05374559928443659 Q66: 25.599990882352966\n",
|
|||
|
|
"seg_quantiles\n",
|
|||
|
|
"High-flow 3313\n",
|
|||
|
|
"Low-flow 3215\n",
|
|||
|
|
"Intermediate-flow 3215\n",
|
|||
|
|
"Name: count, dtype: int64\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"# Baseline 1 variable: average monthly gross traded quantity\n",
|
|||
|
|
"x = dfc[\"gross_flow_qty_mean\"].copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"q33, q66 = x.quantile([0.33, 0.66])\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"seg_quantiles\"] = pd.cut(\n",
|
|||
|
|
" x,\n",
|
|||
|
|
" bins=[-np.inf, q33, q66, np.inf],\n",
|
|||
|
|
" labels=[\"Low-flow\", \"Intermediate-flow\", \"High-flow\"]\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Q33:\", q33, \" Q66:\", q66)\n",
|
|||
|
|
"print(dfc[\"seg_quantiles\"].value_counts(dropna=False))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 17,
|
|||
|
|
"id": "5236feba-7d3e-4224-9485-0449bf35c86f",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqQAAAHHCAYAAABpzkrAAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAa19JREFUeJzt3XdYFNf7NvB7aUsH6WABREWwd1GxElHRRGPX2FtiL9FojDUa1MQeo8Yk4tfYE1s0FuwNG3YjWIItCqgoRaXuef/wZX4OCwrs6mTJ/bkuLt0zZ+Y8Z2dn9tkpZ1RCCAEiIiIiIoUYKR0AEREREf23MSElIiIiIkUxISUiIiIiRTEhJSIiIiJFMSElIiIiIkUxISUiIiIiRTEhJSIiIiJFMSElIiIiIkUxISUiIiIiRb3zhHTq1KlQqVTvuhkAQOPGjdG4cWPp9aFDh6BSqfDbb7+9l/Z79+4NLy+v99JWYaWkpKB///5wc3ODSqXCyJEjlQ6J3pOwsDCoVCqcPXv2rXVzbkv0SlxcHDp06ABHR0eoVCosWLBA2s8cOnRI6fDyJSUlBS4uLlizZo3SoehN9mf79u3belvmsmXLUKpUKaSlpeltmblRqVSYOnVqvusOHTr0ncbzb5bzvXoX6z0vXl5eaN269TtvJ7/0Hc/t27ehUqkQFhb21rrvKtcpUEKavfKz/8zNzeHh4YHg4GAsWrQIycnJegnqwYMHmDp1Ki5cuKCX5enTvzm2/Pjmm28QFhaGzz77DKtXr0aPHj2UDon07IcffsjXToUKbtSoUdizZw8mTJiA1atXo0WLFkqHVGALFy6EjY0NunTponQoBfbNN99g69at+aqr63bQu3dvpKenY/ny5YVeRmGcOHECU6dOxbNnz95ru/8Wf/75Z74TdH3766+/MHXq1PeS4JK2Qh0hnT59OlavXo2lS5di2LBhAICRI0eiUqVKuHTpkqzuV199hZcvXxZo+Q8ePMC0adMKnPTt3bsXe/fuLdA8BfWm2FasWIHo6Oh32r6uDhw4gLp162LKlCn45JNPUKNGDaVDIj1jQvruHDhwAB999BE+//xzfPLJJyhfvrzSIRVIRkYGFi5ciP79+8PY2FjpcAosr4S0R48eePnyJTw9PaUyXbcDc3Nz9OrVC/PmzYMQotDLeZuXL1/iq6++kl6fOHEC06ZN+08npNOmTct1Ws73St/++usvTJs2jQmpQgqVkLZs2RKffPIJ+vTpgwkTJmDPnj3Yt28f4uPj8eGHH8oSUBMTE5ibm+st4Ny8ePECAGBmZgYzM7N32tabmJqaQq1WK9Z+fsTHx8Pe3l7pMAosex3Tf8/z58+VDkFiqNtPth07duDRo0fo1KmT0qHolbGxMczNzfV+eVinTp1w584dHDx4UK/LfZ25uTlMTEze2fKLEkN9r/5N+7B/M71dQ9q0aVNMmjQJd+7cwa+//iqV53YNaXh4OBo0aAB7e3tYW1vD19cXX375JYBX133WqlULANCnTx/p8oDsX7qNGzdGxYoVERkZiYYNG8LS0lKaN6/r3rKysvDll1/Czc0NVlZW+PDDD3Hv3j1ZHS8vL/Tu3Vtr3teX+bbYcruu4vnz5xgzZgxKliwJtVoNX19ffPfdd1q/uLOvDdq6dSsqVqwItVqNChUqYPfu3bm/4TnEx8ejX79+cHV1hbm5OapUqYJVq1ZJ07Ovc4uJicHOnTul2N/0S3DlypVo2rQpXFxcoFar4e/vj6VLl8rqtG7dGqVLl851/oCAANSsWVNW9uuvv6JGjRqwsLCAg4MDunTporUu3rSOt23bhpCQEHh4eECtVsPHxwdff/01srKytNpfsmQJSpcuDQsLC9SuXRtHjx7N9TOSlpaGKVOmoEyZMlCr1ShZsiTGjRuXr2vHsmO9dOkSGjVqBEtLS5QpU0a6bvnw4cOoU6cOLCws4Ovri3379mkt4/z582jZsiVsbW1hbW2NZs2a4eTJk7I62ZfLHD9+HKNHj4azszOsrKzQrl07PHr0SKrn5eWFq1ev4vDhw9I6zq2/b1pGTikpKbCyssKIESO0pt2/fx/GxsYIDQ194/v05MkT9OjRA7a2trC3t0evXr1w8eJFrWuWevfuDWtra9y6dQutWrWCjY0NunfvDiD/29Kb9i/ZFi9ejAoVKsDS0hLFihVDzZo1sXbt2jzjz37/hRBYsmSJ9N6+yaZNm6TPupOTEz755BP8888/0vTt27dDpVLJzir9/vvvUKlU+Pjjj2XL8vPzQ+fOnQvUx9xs3boVXl5e8PHxyXVaxYoVYW5ujooVK2LLli1a+7S8rpfN7fqzS5cuoXfv3ihdujTMzc3h5uaGvn374smTJ7J5s78jbt68id69e8Pe3h52dnbo06eP7IeoSqXC8+fPsWrVKun9z95n57yWMK/t4O+//4ZKpcL8+fO1+n/ixAmoVCqsW7dOKqtRowYcHBywbdu2N76vixYtgrGxseyo5ty5c6FSqTB69GipLCsrCzY2Nvjiiy9k/co+RT116lSMHTsWAODt7Z3nfrqw3xP3799H27ZtYWVlBRcXF+kSlJzrND/fhwCQnp6OyZMno0aNGrCzs4OVlRUCAwO1Evjsz8d3332HH3/8ET4+PlCr1ahVqxbOnDkj1evduzeWLFkivS85t7P8Xm+7a9cuBAYGwsrKCjY2NggJCcHVq1ffOE9YWBg6duwIAGjSpInUds7P+rFjx1C7dm2Ym5ujdOnS+N///qe1HJVKhcOHD2Pw4MFwcXFBiRIlChRbbGws+vTpgxIlSkCtVsPd3R0fffRRrt/Xb4sHAP7++2907NgRDg4OsLS0RN26dbFz5843vh/Zctsv5Gb9+vWoUaMGbGxsYGtri0qVKmHhwoX5aiObXn9q9OjRA19++SX27t2LAQMG5Frn6tWraN26NSpXrozp06dDrVbj5s2bOH78OIBXO97p06dj8uTJGDhwIAIDAwEA9erVk5bx5MkTtGzZEl26dMEnn3wCV1fXN8Y1c+ZMqFQqfPHFF4iPj8eCBQsQFBSECxcuwMLCIt/9y09srxNC4MMPP8TBgwfRr18/VK1aFXv27MHYsWPxzz//aO0Ujx07hs2bN2Pw4MGwsbHBokWL0L59e9y9exeOjo55xvXy5Us0btwYN2/exNChQ+Ht7Y1Nmzahd+/eePbsGUaMGAE/Pz+sXr0ao0aNQokSJTBmzBgAgLOzc57LXbp0KSpUqIAPP/wQJiYm+OOPPzB48GBoNBoMGTIEANC5c2f07NkTZ86ckZJ1ALhz5w5OnjyJb7/9ViqbOXMmJk2ahE6dOqF///549OgRFi9ejIYNG+L8+fOyI095reOwsDBYW1tj9OjRsLa2xoEDBzB58mQkJSXJ2lq6dCmGDh2KwMBAjBo1Crdv30bbtm1RrFgx2c5Bo9Hgww8/xLFjxzBw4ED4+fnh8uXLmD9/Pq5fv56v69WePn2K1q1bo0uXLujYsSOWLl2KLl26YM2aNRg5ciQ+/fRTdOvWDd9++y06dOiAe/fuwcbGBsCr7SEwMBC2trYYN24cTE1NsXz5cjRu3FhKZl83bNgwFCtWDFOmTMHt27exYMECDB06FBs2bAAALFiwAMOGDYO1tTUmTpwIAFrbx9uWkZO1tTXatWuHDRs2YN68ebJTvevWrYMQQkoac6PRaNCmTRucPn0an332GcqXL49t27ahV69eudbPzMxEcHAwGjRogO+++w6Wlpb53pbetn8BXl1aM3z4cHTo0AEjRoxAamoqLl26hFOnTqFbt265xtSwYUPpmusPPvgAPXv2zLO/wKvPaZ8+fVCrVi2EhoYiLi4OCxcuxPHjx6XPeoMGDaBSqXDkyBFUrlwZAHD06FEYGRnh2LFj0rIePXqEqKgo6WaW/PQxLydOnED16tW1yvfu3Yv27dvD398foaGhePLkifSFWFjh4eH4+++/0adPH7i5ueHq1av48ccfcfXqVZw8eVIroe/UqRO8vb0RGhqKc+fO4aeffoKLiwtmz54NAFi9ejX69++P2rVrY+DAgQCQa2IN5L0dlC5dGvXr18eaNWswatQ
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.hist(dfc[\"gross_flow_qty_mean\"], bins=100)\n",
|
|||
|
|
"plt.axvline(q33, linestyle=\"--\")\n",
|
|||
|
|
"plt.axvline(q66, linestyle=\"--\")\n",
|
|||
|
|
"plt.xlabel(\"Average monthly gross flow (quantity)\")\n",
|
|||
|
|
"plt.ylabel(\"Count\")\n",
|
|||
|
|
"plt.title(\"Distribution of average monthly gross flows (quantity) with quantile thresholds\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"#X= activite moyenen mensuelle , Y = combien de client ont cette valeurde X"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 18,
|
|||
|
|
"id": "4e6a7679-386a-468c-84f8-5179eab21f82",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZeJJREFUeJzt3XdYFNf7NvB7WXpZEKQKgiVRUGzYsBeUKBo1aixE0ViiwUpi+yb2WKKJ3VhS1BiNJYnGiA0bNmJBsUvUYBcwFlYEKbvn/cOX+bkCgggMOvfnuva6ds6cPfPM7uzss2fOzKiEEAJERERECmYkdwBEREREcmNCRERERIrHhIiIiIgUjwkRERERKR4TIiIiIlI8JkRERESkeEyIiIiISPGYEBEREZHiMSEiIiIixWNCpDAqlQqTJk2SpleuXAmVSoVr164V+bL79OkDLy8vafratWtQqVT45ptvinzZADBp0iSoVKpiWVZBZWZmYvTo0fDw8ICRkRE6duwod0hUTPbv3w+VSoXffvstz7ovfpfomeTkZPTv3x8uLi5QqVQYMWKEtJ9ZuXKl3OHli16vR9WqVTFt2jS5Qyk0Wdv2/v37C63NHTt2wNraGvfu3Su0NpkQFZKsxOLEiRNyh1LkUlJSMGnSpELduAtLSY4tP3766SfMnj0bXbp0wapVqzBy5Ei5Q6JCtnbtWsybN0/uMN5K06dPx8qVKzF48GCsXr0avXr1kjukV/brr7/i5s2bGDJkiNyhvLLvvvsu34nn634P3nvvPVSsWBEzZswocBvZCCoUK1asEADE8ePH5Q7lpQCIiRMnStOZmZkiNTVV6PX6fLdx7969bO3kR3p6unj69Kk0HRcXJwCI2bNnv1I7BY0tIyNDpKamFtqyikK3bt1EmTJl5A6DilBQUJDw9PTMVr5v3z4BQGzcuDHPNkJCQnJsQ+nq1asnGjZsaFCWtZ9ZsWKFPEG9ourVq4uBAwfKHUaBVKlSRTRt2jRbuU6nE6mpqUKn00lluX0PXsV3330nLC0thVarfa12srCHSOHUajXMzc2L9FDSkydPAAAmJiYwMzMrsuXkxdjYGObm5rItPz8SExNhZ2cndxivLOszJuUpSZ/9m/r9yXLq1CmcPn0aH374odyhFCojIyOYm5vDyKhwU47OnTsjLS0NGzduLJT2mBAVs1OnTqFNmzbQaDSwtrZGy5Yt8ffff2erd+bMGTRt2hQWFhZwd3fHV199hRUrVuR7vE9aWhpGjhwJR0dH2NjY4P3338etW7ey1ctpDNGJEycQGBiI0qVLw8LCAuXKlcPHH38M4Nm4H0dHRwDA5MmToVKpDMYl9enTB9bW1rh69Sratm0LGxsbBAcHS/NyG/cwd+5ceHp6wsLCAk2bNsW5c+cM5jdr1gzNmjXL9rrn28wrtpzGEGVmZmLq1KmoUKECzMzM4OXlhf/9739IS0szqOfl5YV27drh0KFDqFu3LszNzVG+fHn8/PPPOa7Pi548eYLPPvsMHh4eMDMzQ6VKlfDNN99ACCHFrlKpsG/fPpw/f16K/WWH/v78808EBQXBzc0NZmZmqFChAqZOnQqdTifVGTJkCKytrZGSkpLt9T169ICLi4tB/e3bt6Nx48awsrKCjY0NgoKCcP78eYPXvewzPnjwILp27YqyZcvCzMwMHh4eGDlyJFJTU7Mtf+PGjfDx8YG5uTmqVq2KTZs25biN6PV6zJs3D1WqVIG5uTmcnZ3xySef4OHDh3m+71mx3rhxA+3atYO1tTXKlCmDxYsXAwDOnj2LFi1awMrKCp6enli7dm22Nv7991907doV9vb2sLS0RP369REeHm5QJ2uMxIYNGzBt2jS4u7vD3NwcLVu2xJUrV6R6zZo1Q3h4OK5fvy59xjmt78vaeJEQAl5eXujQoUO2eU+fPoWtrS0++eSTl75PqampGDZsGEqXLi3tL27fvp1tzGHWd+jChQvo2bMnSpUqhUaNGgHI/3fpZfuXLOvWrYOfnx9sbGyg0Wjg6+uL+fPn5xp/1vsfFxeH8PBw6b192b5y79690rZuZ2eHDh064OLFi9L8M2fOQKVSYcuWLVJZdHQ0VCoVatWqZdBWmzZtUK9evVdax5xs3rwZpqamaNKkSbZ5hw4dQp06dWBubo4KFSpg2bJl2fZpLxsv9eJnef36dXz66aeoVKkSLCws4ODggK5du2Z7z7J+Iw4fPoywsDA4OjrCysoKnTp1Mhi/4+XlhfPnzyMyMlJ6/7P22S+OIcrte5CcnAwrKysMHz48W/y3bt2CWq02OETm5OSEatWq4c8//8zzvc0P40JphfLl/PnzaNy4MTQaDUaPHg0TExMsW7YMzZo1Q2RkpPSFun37Npo3bw6VSoVx48bBysoKP/zwwyv1rvTv3x+//PILevbsiQYNGmDv3r0ICgrK83WJiYlo3bo1HB0dMXbsWNjZ2eHatWv4448/AACOjo5YsmQJBg8ejE6dOuGDDz4AAFSrVk1qIzMzE4GBgWjUqBG++eYbWFpavnSZP//8Mx4/fozQ0FA8ffoU8+fPR4sWLXD27Fk4Ozvne53zE9uL+vfvj1WrVqFLly747LPPcPToUcyYMQMXL17Epk2bDOpeuXIFXbp0Qb9+/RASEoKffvoJffr0gZ+fH6pUqZLrMoQQeP/997Fv3z7069cPNWrUwM6dOzFq1Cjcvn0bc+fOhaOjI1avXo1p06YhOTlZ+tJ7e3vn2u7KlSthbW2NsLAwWFtbY+/evZgwYQK0Wi1mz54NAOjWrRsWL16M8PBwdO3aVXptSkoK/vrrL/Tp0wdqtRoAsHr1aoSEhCAwMBBff/01UlJSsGTJEjRq1AinTp0y+OHO7TPeuHEjUlJSMHjwYDg4OODYsWNYuHAhbt26ZfAvLjw8HN26dYOvry9mzJiBhw8fol+/fihTpky29fzkk0+wcuVK9O3bF8OGDUNcXBwWLVqEU6dO4fDhwzAxMcn1PQIAnU6HNm3aoEmTJpg1axbWrFmDIUOGwMrKCl988QWCg4PxwQcfYOnSpejduzf8/f1Rrlw5AEBCQgIaNGiAlJQUDBs2DA4ODli1ahXef/99/Pbbb+jUqZPBsmbOnAkjIyN8/vnnSEpKwqxZsxAcHIyjR48CAL744gskJSXh1q1bmDt3LgDA2tr6ldp4kUqlwkcffYRZs2bhwYMHsLe3l+b99ddf0Gq1+Oijj176HvXp0wcbNmxAr169UL9+fURGRr50f9G1a1e88847mD59upTU5+e7lNf+BQAiIiLQo0cPtGzZEl9//TUA4OLFizh8+HCOP5TAs+/J6tWrMXLkSLi7u+Ozzz4D8GyfkNOg2927d6NNmzYoX748Jk2ahNTUVCxcuBANGzbEyZMn4eXlhapVq8LOzg4HDhzA+++/D+BZwm9kZITTp09Dq9VCo9FAr9fjyJEjGDhwYL7XMTdHjhxB1apVs23TZ8+eldqcNGkSMjMzMXHixFfaP77o+PHjOHLkCLp37w53d3dcu3YNS5YsQbNmzXDhwoVs++2hQ4eiVKlSmDhxIq5du4Z58+ZhyJAhWL9+PQBg3rx5GDp0KKytrfHFF18AQK7x5fY9sLa2RqdOnbB+/XrMmTNH2jcBz8ZWCSGkP19Z/Pz8sHnz5gK/DwYK5cAb5WsMUceOHYWpqam4evWqVHbnzh1hY2MjmjRpIpUNHTpUqFQqcerUKans/v37wt7eXgAQcXFxL40lJiZGABCffvqpQXnPnj2zja/JijurzU2bNuW5Hi8bpxMSEiIAiLFjx+Y47/ljxlnH9i0sLMStW7ek8qNHjwoAYuTIkVJZ06ZNczw2/WKbL4tt4sSJ4vlNPut96t+/v0G9zz//XAAQe/fulco8PT0FAHHgwAGpLDExUZiZmYnPPvss27Ket3nzZgFAfPXVVwblXbp0ESqVSly5csVgPatUqfLS9rKkpKRkK/vkk0+EpaWlNFZLr9eLMmXKiM6dOxvU27Bhg8H6PH78WNjZ2YkBAwYY1IuPjxe2trYG5S/7jHOKacaMGUKlUonr169LZb6+vsLd3V08fvxYKtu
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.hist(np.log1p(dfc[\"gross_flow_qty_mean\"]), bins=100)\n",
|
|||
|
|
"plt.axvline(np.log1p(q33), linestyle=\"--\")\n",
|
|||
|
|
"plt.axvline(np.log1p(q66), linestyle=\"--\")\n",
|
|||
|
|
"plt.xlabel(\"log(1 + avg monthly gross flow) (quantity)\")\n",
|
|||
|
|
"plt.ylabel(\"Count\")\n",
|
|||
|
|
"plt.title(\"Log-distribution of average monthly gross flows (quantity)\")\n",
|
|||
|
|
"plt.show()\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 19,
|
|||
|
|
"id": "658f167f-1e8d-4c34-ae7e-bd1d4759d5c4",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"(array([415., 187., 172., 173., 162., 136., 125., 126., 112., 136., 134.,\n",
|
|||
|
|
" 153., 142., 150., 154., 158., 163., 174., 135., 133., 153., 128.,\n",
|
|||
|
|
" 130., 139., 120., 116., 118., 101., 82., 89., 100., 90., 82.,\n",
|
|||
|
|
" 104., 86., 104., 90., 82., 87., 70., 78., 85., 66., 62.,\n",
|
|||
|
|
" 91., 74., 63., 78., 62., 70., 51., 65., 48., 44., 42.,\n",
|
|||
|
|
" 53., 52., 43., 35., 35., 22., 27., 29., 25., 21., 21.,\n",
|
|||
|
|
" 25., 15., 19., 9., 8., 7., 9., 7., 4., 7., 4.,\n",
|
|||
|
|
" 1., 4., 2., 2., 1., 0., 5., 2., 2., 0., 2.,\n",
|
|||
|
|
" 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,\n",
|
|||
|
|
" 1.]),\n",
|
|||
|
|
" array([1.24999219e-05, 1.51704407e-01, 3.03396315e-01, 4.55088222e-01,\n",
|
|||
|
|
" 6.06780130e-01, 7.58472037e-01, 9.10163945e-01, 1.06185585e+00,\n",
|
|||
|
|
" 1.21354776e+00, 1.36523967e+00, 1.51693157e+00, 1.66862348e+00,\n",
|
|||
|
|
" 1.82031539e+00, 1.97200730e+00, 2.12369920e+00, 2.27539111e+00,\n",
|
|||
|
|
" 2.42708302e+00, 2.57877493e+00, 2.73046683e+00, 2.88215874e+00,\n",
|
|||
|
|
" 3.03385065e+00, 3.18554256e+00, 3.33723446e+00, 3.48892637e+00,\n",
|
|||
|
|
" 3.64061828e+00, 3.79231019e+00, 3.94400209e+00, 4.09569400e+00,\n",
|
|||
|
|
" 4.24738591e+00, 4.39907782e+00, 4.55076972e+00, 4.70246163e+00,\n",
|
|||
|
|
" 4.85415354e+00, 5.00584545e+00, 5.15753735e+00, 5.30922926e+00,\n",
|
|||
|
|
" 5.46092117e+00, 5.61261308e+00, 5.76430498e+00, 5.91599689e+00,\n",
|
|||
|
|
" 6.06768880e+00, 6.21938071e+00, 6.37107261e+00, 6.52276452e+00,\n",
|
|||
|
|
" 6.67445643e+00, 6.82614834e+00, 6.97784024e+00, 7.12953215e+00,\n",
|
|||
|
|
" 7.28122406e+00, 7.43291597e+00, 7.58460787e+00, 7.73629978e+00,\n",
|
|||
|
|
" 7.88799169e+00, 8.03968360e+00, 8.19137550e+00, 8.34306741e+00,\n",
|
|||
|
|
" 8.49475932e+00, 8.64645123e+00, 8.79814313e+00, 8.94983504e+00,\n",
|
|||
|
|
" 9.10152695e+00, 9.25321886e+00, 9.40491076e+00, 9.55660267e+00,\n",
|
|||
|
|
" 9.70829458e+00, 9.85998649e+00, 1.00116784e+01, 1.01633703e+01,\n",
|
|||
|
|
" 1.03150622e+01, 1.04667541e+01, 1.06184460e+01, 1.07701379e+01,\n",
|
|||
|
|
" 1.09218298e+01, 1.10735217e+01, 1.12252137e+01, 1.13769056e+01,\n",
|
|||
|
|
" 1.15285975e+01, 1.16802894e+01, 1.18319813e+01, 1.19836732e+01,\n",
|
|||
|
|
" 1.21353651e+01, 1.22870570e+01, 1.24387489e+01, 1.25904408e+01,\n",
|
|||
|
|
" 1.27421327e+01, 1.28938246e+01, 1.30455165e+01, 1.31972085e+01,\n",
|
|||
|
|
" 1.33489004e+01, 1.35005923e+01, 1.36522842e+01, 1.38039761e+01,\n",
|
|||
|
|
" 1.39556680e+01, 1.41073599e+01, 1.42590518e+01, 1.44107437e+01,\n",
|
|||
|
|
" 1.45624356e+01, 1.47141275e+01, 1.48658194e+01, 1.50175113e+01,\n",
|
|||
|
|
" 1.51692032e+01]),\n",
|
|||
|
|
" <BarContainer object of 100 artists>)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 19,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAKVRJREFUeJzt3X90VPWd//FXfpCBADNpgskkSwJUrRAISEHDFNdlJRJCSuUQt8VSSJEDR85ghXQpxgUUrAZpVxBXofZ0gT1LFuse0QUXMASMdQk/DM3yS1NhqcGFSawsGYiHJCT3+8cu8+1A+DFhhvlk8nycc8/Jvfczd96fg5159fP53DtRlmVZAgAAMEh0uAsAAAC4EgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGCc2HAX0BFtbW06ffq0evfuraioqHCXAwAAboJlWTp//rzS0tIUHX39MZJOGVBOnz6t9PT0cJcBAAA64NSpU+rbt+9123TKgNK7d29J/9tBu90e5moAAMDN8Hq9Sk9P932PX0+nDCiXp3XsdjsBBQCATuZmlmewSBYAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOLHhLsBE/Z9+z2//j8vzw1QJAABdEyMoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxbimgLF++XFFRUZo3b57v2MWLF+V2u5WUlKRevXqpoKBAdXV1fq+rra1Vfn6+4uPjlZycrAULFujSpUu3UgoAAIggHQ4oBw4c0K9+9SsNHTrU7/j8+fO1ZcsWvfXWW6qoqNDp06c1efJk3/nW1lbl5+erublZe/bs0YYNG7R+/XotWbKk470AAAARpUMB5cKFC5o6dap+/etf6xvf+IbveENDg37zm9/o5Zdf1kMPPaQRI0Zo3bp12rNnj/bu3StJev/993Xs2DH98z//s+69917l5eXp+eef12uvvabm5ubg9AoAAHRqHQoobrdb+fn5ysnJ8TteVVWllpYWv+MDBw5URkaGKisrJUmVlZXKyspSSkqKr01ubq68Xq+OHj3a7vs1NTXJ6/X6bQAAIHIF/GOBmzZt0sGDB3XgwIGrznk8HsXFxSkhIcHveEpKijwej6/Nn4eTy+cvn2tPSUmJli5dGmipAACgkwpoBOXUqVN66qmntHHjRnXv3j1UNV2luLhYDQ0Nvu3UqVO37b0BAMDtF1BAqaqqUn19vb797W8rNjZWsbGxqqio0OrVqxUbG6uUlBQ1Nzfr3Llzfq+rq6uT0+mUJDmdzqvu6rm8f7nNlWw2m+x2u98GAAAiV0ABZezYsTp8+LCqq6t928iRIzV16lTf3926dVN5ebnvNTU1NaqtrZXL5ZIkuVwuHT58WPX19b42ZWVlstvtyszMDFK3AABAZxbQGpTevXtryJAhfsd69uyppKQk3/GZM2eqqKhIiYmJstvtevLJJ+VyuTRq1ChJ0rhx45SZmalp06ZpxYoV8ng8WrRokdxut2w2W5C6BQAAOrOAF8neyMqVKxUdHa2CggI1NTUpNzdXr7/+uu98TEyMtm7dqjlz5sjlcqlnz54qLCzUsmXLgl0KAADopKIsy7LCXUSgvF6vHA6HGhoaQrIepf/T7/nt/3F5ftDfAwCAriaQ729+iwcAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYJyAAsqaNWs0dOhQ2e122e12uVwubdu2zXd+zJgxioqK8tueeOIJv2vU1tYqPz9f8fHxSk5O1oIFC3Tp0qXg9AYAAESE2EAa9+3bV8uXL9fdd98ty7K0YcMGPfLII/r973+vwYMHS5JmzZqlZcuW+V4THx/v+7u1tVX5+flyOp3as2ePzpw5o+nTp6tbt2568cUXg9QlAADQ2QUUUCZOnOi3/8ILL2jNmjXau3evL6DEx8fL6XS2+/r3339fx44d086dO5WSkqJ7771Xzz//vBYuXKjnnntOcXFxHewGAACIJB1eg9La2qpNmzapsbFRLpfLd3zjxo3q06ePhgwZouLiYn399de+c5WVlcrKylJKSorvWG5urrxer44ePXrN92pqapLX6/XbAABA5ApoBEWSDh8+LJfLpYsXL6pXr17avHmzMjMzJUk//OEP1a9fP6WlpenQoUNauHChampq9Pbbb0uSPB6PXziR5Nv3eDzXfM+SkhItXbo00FIBAEAnFXBAueeee1RdXa2Ghgb967/+qwoLC1VRUaHMzEzNnj3b1y4rK0upqakaO3asTpw4oTvvvLPDRRYXF6uoqMi37/V6lZ6e3uHrAQAAswU8xRMXF6e77rpLI0aMUElJiYYNG6ZXXnml3bbZ2dmSpOPHj0uSnE6n6urq/Npc3r/WuhVJstlsvjuHLm8AACBy3fJzUNra2tTU1NTuuerqaklSamqqJMnlcunw4cOqr6/3tSkrK5PdbvdNEwEAAAQ0xVNcXKy8vDxlZGTo/PnzKi0t1QcffKAdO3boxIkTKi0t1YQJE5SUlKRDhw5p/vz5evDBBzV06FBJ0rhx45SZmalp06ZpxYoV8ng8WrRokdxut2w2W0g6CAAAOp+AAkp9fb2mT5+uM2fOyOFwaOjQodqxY4cefvhhnTp1Sjt37tSqVavU2Nio9PR0FRQUaNGiRb7Xx8TEaOvWrZozZ45cLpd69uypwsJCv+emAAAARFmWZYW7iEB5vV45HA41NDSEZD1K/6ff89v/4/L8oL8HAABdTSDf3/wWDwAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgnIACypo1azR06FDZ7XbZ7Xa5XC5t27bNd/7ixYtyu91KSkpSr169VFBQoLq6Or9r1NbWKj8/X/Hx8UpOTtaCBQt06dKl4PQGAABEhIACSt++fbV8+XJVVVXp448/1kMPPaRHHnlER48elSTNnz9fW7Zs0VtvvaWKigqdPn1akydP9r2+tbVV+fn5am5u1p49e7RhwwatX79eS5YsCW6vAABApxZlWZZ1KxdITEzUL37xCz366KO64447VFpaqkcffVSS9Omnn2rQoEGqrKzUqFGjtG3bNn33u9/V6dOnlZKSIklau3atFi5cqC+//FJxcXE39Z5er1cOh0MNDQ2y2+23Un67+j/9nt/+H5fnB/09AADoagL5/u7wGpTW1lZt2rRJjY2NcrlcqqqqUktLi3JycnxtBg4cqIyMDFVWVkqSKisrlZWV5QsnkpSbmyuv1+sbhWlPU1OTvF6v3wYAACJXwAHl8OHD6tWrl2w2m5544glt3rxZmZmZ8ng8iouLU0JCgl/7lJQUeTweSZLH4/ELJ5fPXz53LSUlJXI4HL4tPT090LIBAEAnEnBAueeee1RdXa19+/Zpzpw5Kiws1LFjx0JRm09xcbEaGhp826lTp0L6fgAAILxiA31BXFyc7rrrLknSiBEjdODAAb3yyiv6wQ9+oObmZp07d85vFKWurk5Op1OS5HQ6tX//fr/rXb7L53Kb9thsNtlstkBLBQAAndQtPwelra1NTU1NGjFihLp166by8nLfuZqaGtXW1srlckmSXC6
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"plt.hist(np.log1p(dfc.loc[dfc[\"gross_flow_qty_mean\"]>0, \"gross_flow_qty_mean\"]), bins=100)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 20,
|
|||
|
|
"id": "42eb180e-a512-4d5c-8179-91b88e3067df",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"/tmp/ipykernel_8776/4281614443.py:3: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.\n",
|
|||
|
|
" plt.boxplot(\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmsAAAGzCAYAAABwyVA7AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAdshJREFUeJzt3Xl8TPf+P/DXZMgispDIhiQktkgqxBpCQjS1p7HVGlGqbdBKUbRFey0ttbUGl9L0KqU0aKkWITdBehGidkEstSSxZBWJzJzfH34zXyOTZCaZycxkXs/HIw/mnM+c856ZkzPvfFaRIAgCiIiIiMggmek7ACIiIiIqG5M1IiIiIgPGZI2IiIjIgDFZIyIiIjJgTNaIiIiIDBiTNSIiIiIDxmSNiIiIyIAxWSMiIiIyYEzWiIiIiAyYXpI1T09PjBs3TvE4ISEBIpEICQkJ+gin0mJjYyESiXDz5k19h4Jx48bB09NTaZtIJML8+fOVtp08eRKBgYGwtraGSCRCamoqAOCPP/6Av78/LC0tIRKJkJ2dXS1xa9ur11ZZqvLZzZ8/HyKRSPPgtOjmzZsQiUSIjY1Vu+zXX39dYVldvLaMjAwMGTIEDg4OEIlEWLlypUH9zqv6PTFW6l7/ZPj69u2LiRMn6jsMrdHknlUTPX/+HI0bN8aaNWsq9XytJmvXr1/HpEmT0LRpU1haWsLW1hZdu3bFqlWrUFhYqM1TVcqiRYuwe/dufYehN8+fP8fQoUPx+PFjrFixAps3b4aHhwcePXqEYcOGwcrKChKJBJs3b4a1tbW+wyUN/f777waZdEybNg1//vknZs+ejc2bN+ONN97Qd0haY+j3lIsXL2L+/PkG8QdlTXTv3j3Mnz9f8Uevthw7dgwHDhzAxx9/rNXjVoetW7di5cqVapXV9T1LKpXCzc0NIpEI+/fvV1lm3LhxqFu3bpnHqFu3rsrKJZFIhB9//FHlc7p27QqRSARfX1/Fttq1ayMmJgYLFy7Es2fPNH8xgpbs3btXsLKyEuzt7YWpU6cK69evF1avXi289dZbQu3atYWJEycqynp4eAiRkZGKx1KpVCgsLBSkUqm2wlHJ2tpa6bxVVVJSIhQWFgoymUxrx6ysyMhIwcPDQ2lbYWGh8Pz5c8XjS5cuCQCEDRs2KJXbv3+/AEA4ePBgdYSqU8+ePROKi4srLPf9998LAIT09HSNzzFv3jxBi786lSKTyYTCwkKhpKREsS06OlplXOnp6QIAYenSpRUe9/nz50JhYaFWY3V2dhZGjRqltO3IkSMCAOHIkSNaPVdlvPp7oglt31Oq6tXrf8eOHQbzPtdEJ0+eFAAI33//vVaPO2jQIOH111/X6jGrS79+/Up9FwmCZvcsbTlw4IAAQPD09Cx1D5KLjIwUrK2tyzzGq7/j8nuXpaWl0KdPn1Ll5fdbS0tLoXXr1kr7njx5IpibmwsbN27U+LXU0jy9Ky09PR1vvfUWPDw8cPjwYbi6uir2RUdH49q1a9i3b1+ZzzczM4OlpaU2QqlWYrEYYrFY32GU6dX3NDMzEwBgb2+v1vaqKCgo0EvtnIWFRbWfUx9EIpFOfmdq1aqFWrW0cltQyMzM1Oq1pW3GeO8pi6lc/zVZZmYm9u3bh3Xr1uk7FK3S1T2rPD/++CPatWuHyMhIzJkzR6vfS3379sWvv/6Khw8fwtHRUbF969atcHZ2RrNmzfDkyROl59jb2+P1119HbGwsxo8fr9kJNU7vVHj33XcFAMKxY8fUKv9qzVpZf2X/9ddfQlhYmGBraytYWVkJ3bt3F44ePapURl7LkZaWJkRGRgp2dnaCra2tMG7cOKGgoEBRDkCpn4r+Iv7mm28EHx8fRY1hQECAsGXLFsX+V2tn5LFUdC6pVCqsWLFC8PHxESwsLAQnJyfhnXfeER4/fqzW+7dr1y6hdevWgoWFhdC6dWshLi5OZc0aAGHevHmCILz46+HVmHr06CH06NGj3Fg1+QwuXLggjBgxQrC3txf8/f0FQRCE+/fvC+PGjRMaNmwomJubCy4uLsLAgQPLrdHas2ePAEA4e/asYtvOnTsFAMKbb76pVLZly5bCsGHDFI9fvbYEQRDOnz8vhISECJaWlkLDhg2Ff/3rX8LGjRtV1qz9/vvvQrdu3YQ6deoIdevWFfr27SucP39e5euV6969u/Daa6+pfC3Nmzcv9y/kadOmCfXr11eqnZ08ebIAQFi1apVi24MHDwQAwpo1awRB+L+/3uR/0av6fOUxvlyz9u9//1to2rSpYG5uLrRv3144ceJEua9NEF5cR9HR0YrrztzcXPDx8RH2799f5usShP/7/VAVU1m/8z///LPQrl07wdLSUnBwcBBGjRol/PPPP4r9Vbk2yvLy78nL70FV7yn//POPEBUVJTg5OSnes1f/opa/D9u3bxcWLFggNGzYULCwsBB69uwppKWlKZW9evWqEBERITg7OwsWFhZCw4YNheHDhwvZ2dmKMi9f/2W9/0eOHBHGjh0rODg4qKyF7t27t9C8efNy3zN1YhEEQdi8ebPi86xXr54wfPhw4fbt26WOt3r1aqFJkyaCpaWl0KFDByExMVFxf1L1Xs2fP19wc3MT6tatKwwePFjIzs4Wnj17JnzwwQdCgwYNBGtra2HcuHHCs2fPSp1LnZh69OghtG7dWrhw4YIQHBwsWFlZCW5ubsJXX31VKp5Xf+S/k+q+R6/atGmTAEC4efNmqX3q3stevablXr0/Pnr0SPjoo48EX19fwdraWrCxsRHeeOMNITU1Vel56l6nqr5P5N9L6t6zZDKZ4OHhIQwcOLBU/IWFhYKtra3wzjvvlPseCoIgPH36VLCxsRGWLFki3L9/XzAzM1P6/parbM3aDz/8IFhbWyvuyXKtW7cWpkyZoriGXrVq1SpBJBIJjx49qvA1vEwrf0L/9ttvaNq0KQIDA7VxOADA4cOH0adPHwQEBGDevHkwMzPD999/j549eyIpKQkdO3ZUKj9s2DA0adIEixcvxunTp/Hdd9/ByckJX331FQBg8+bNmDBhAjp27Ih33nkHAODl5VXm+Tds2ICpU6diyJAh+OCDD/Ds2TP8/fff+N///oeRI0eqfE5ERAS8vb2VtqWkpGDlypVwcnJSbJs0aRJiY2MRFRWFqVOnIj09HatXr8aZM2dw7Ngx1K5du8y4Dhw4gMGDB8PHxweLFy/Go0ePEBUVhUaNGpX7fk6aNAkNGzbEokWLMHXqVHTo0AHOzs4AgBYtWmD9+vX44osv0KRJE8X7oulnMHToUDRr1gyLFi2CIAgAgMGDB+PChQuYMmUKPD09kZmZiYMHD+L27dulBkTIdevWDSKRCImJiXjttdcAAElJSTAzM8PRo0cV5bKysnD58mVMnjy5zNf94MEDhISEoKSkBLNmzYK1tTXWr18PKyurUmU3b96MyMhIhIWF4auvvsLTp0+xdu1adOvWDWfOnCkz3jFjxmDixIk4f/68Uh+FkydP4urVq/j000/LjC8oKAgrVqzAhQsXFM+Vv9akpCRMnTpVsQ0AunfvrvI4kyZNwr1793Dw4EFs3rxZZZmtW7ciLy8PkyZNgkgkwpIlSxAREYEbN26Ue80BwNGjRxEXF4f3338fNjY2+OabbzB48GDcvn0bDg4OKp/TvXt3bN68GWPGjEHv3r0xduzYcs8h/53o0KEDFi9ejIyMDKxatQrHjh3DmTNnYG9vr9VroyJVuadkZGSgc+fOEIlEmDx5Mho0aID9+/fj7bffRm5uLj788EOlc3355ZcwMzPD9OnTkZOTgyVLlmDUqFH43//+BwAoLi5GWFgYioqKMGXKFLi4uODu3bvYu3cvsrOzYWdnVyr+7t27Y+rUqfjmm28wZ84ctGrVCgDQqlUrjBkzBv/5z3/w559/on///ornPHjwAIcPH8a8efPKfF/UjWXhwoX47LPPMGzYMEyYMAFZWVn49ttv0b17d8XnCQBr167F5MmTERQUhGnTpuHmzZsIDw9HvXr1VN7XFi9eDCsrK8yaNQvXrl3Dt99+i9q1a8PMzAxPnjzB/Pnz8dd
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"' Ce que tu vois sur ton graphe\\n\\nLes trois segments contiennent des outliers énormes.\\nIl y a une forte dispersion dans chaque segment.\\nMême dans Low-flow et Mid-flow, tu as des comptes avec AUM très grandes (points très haut).\\nEt inversement, dans High-flow, tu as aussi des comptes relativement petits.\\n\\nC’est ça qui justifie la phrase :\\n“Flow intensity alone does not uniquely define client type because accounts of very different sizes may fall in the same flow segment.”\\n'"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 20,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"#limite client size \n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.boxplot(\n",
|
|||
|
|
" [dfc.loc[dfc[\"seg_quantiles\"]==s, \"aum_qty_mean\"].dropna()\n",
|
|||
|
|
" for s in [\"Low-flow\",\"Intermediate-flow\",\"High-flow\"]],\n",
|
|||
|
|
" labels=[\"Low\",\"Mid\",\"High\"]\n",
|
|||
|
|
")\n",
|
|||
|
|
"plt.yscale(\"log\")\n",
|
|||
|
|
"plt.ylabel(\"Mean AUM (quantity) [log scale]\")\n",
|
|||
|
|
"plt.title(\"Client size differs widely within flow-intensity segments (quantity AUM)\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"''' Ce que tu vois sur ton graphe\n",
|
|||
|
|
"\n",
|
|||
|
|
"Les trois segments contiennent des outliers énormes.\n",
|
|||
|
|
"Il y a une forte dispersion dans chaque segment.\n",
|
|||
|
|
"Même dans Low-flow et Mid-flow, tu as des comptes avec AUM très grandes (points très haut).\n",
|
|||
|
|
"Et inversement, dans High-flow, tu as aussi des comptes relativement petits.\n",
|
|||
|
|
"\n",
|
|||
|
|
"C’est ça qui justifie la phrase :\n",
|
|||
|
|
"“Flow intensity alone does not uniquely define client type because accounts of very different sizes may fall in the same flow segment.”\n",
|
|||
|
|
"'''"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "ae004a9f-3a5b-4570-a07c-2f4882fff3c0",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Baseline 2"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 21,
|
|||
|
|
"id": "0e10f5a2-d678-484e-8789-a907635a5a20",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"count 9743.000000\n",
|
|||
|
|
"mean 59.264703\n",
|
|||
|
|
"std 38.537320\n",
|
|||
|
|
"min 6.000000\n",
|
|||
|
|
"25% 26.000000\n",
|
|||
|
|
"50% 50.000000\n",
|
|||
|
|
"75% 80.000000\n",
|
|||
|
|
"max 130.000000\n",
|
|||
|
|
"Name: n_months, dtype: float64"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 21,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"dfc[\"n_months\"].describe()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 22,
|
|||
|
|
"id": "0eb64794-a3b2-4aa3-a43c-62a39eab35e2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"seg_2D\n",
|
|||
|
|
"Highly active (high int, high freq) 3624\n",
|
|||
|
|
"Dormant (low int, low freq) 3588\n",
|
|||
|
|
"Small rebalancers (low int, high freq) 1283\n",
|
|||
|
|
"Occasional large movers (high int, low freq) 1248\n",
|
|||
|
|
"Name: count, dtype: int64\n",
|
|||
|
|
"thr_int: 1.7333333333210905 thr_freq: 0.041666666666666664\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"dfc[\"rel_intensity_total\"] = dfc[\"gross_flow_to_aum\"] # turnover proxy\n",
|
|||
|
|
"dfc[\"frequency\"] = dfc[\"flow_freq\"] # share of active months\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Thresholds: medians (simple + explainable)\n",
|
|||
|
|
"thr_int = dfc[\"rel_intensity_total\"].median()\n",
|
|||
|
|
"thr_freq = dfc[\"frequency\"].median()\n",
|
|||
|
|
"\n",
|
|||
|
|
"def quadrant(row):\n",
|
|||
|
|
" low_int = row[\"rel_intensity_total\"] < thr_int\n",
|
|||
|
|
" low_frq = row[\"frequency\"] < thr_freq\n",
|
|||
|
|
"\n",
|
|||
|
|
" if low_int and low_frq:\n",
|
|||
|
|
" return \"Dormant (low int, low freq)\"\n",
|
|||
|
|
" if low_int and (not low_frq):\n",
|
|||
|
|
" return \"Small rebalancers (low int, high freq)\"\n",
|
|||
|
|
" if (not low_int) and low_frq:\n",
|
|||
|
|
" return \"Occasional large movers (high int, low freq)\"\n",
|
|||
|
|
" return \"Highly active (high int, high freq)\"\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc[\"seg_2D\"] = dfc.apply(quadrant, axis=1)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(dfc[\"seg_2D\"].value_counts())\n",
|
|||
|
|
"print(\"thr_int:\", thr_int, \"thr_freq:\", thr_freq)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 26,
|
|||
|
|
"id": "2850c428-c9bf-42fa-87d1-fd8d5d10b016",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA/4pJREFUeJzsnXd8FNUWx3+bsum9kpBC70noCkKoIkgRFIKFKogo0lWwUEQFQRQfRUAEfFgIIE2fSok0EQstAQWkhdASCCEJ6cnufX+ss9kyszuzO5vdJOf7+fAhO3tn5szszL3nnnuKgjHGQBAEQRAEUYtxsrcABEEQBEEQ9oYUIoIgCIIgaj2kEBEEQRAEUeshhYggCIIgiFoPKUQEQRAEQdR6SCEiCIIgCKLWQwoRQRAEQRC1HlKICIIgCIKo9ZBCRBAEQRBErYcUIgekW7duaNmypazHVCgUmDRpkqzHFMPGjRuhUCiQnp5e5ecGgNGjRyM2NtYu5yb0SU9Ph0KhwMaNG+0timzY6ppiY2MxevRoWY/pyOe1Bd26dUO3bt3sLYYsbNq0CU2bNoWrqyv8/f3tLU6NhRQiM/z555+YNGkSWrRoAS8vL0RHR2PYsGH4559/jNp269YNCoUCCoUCTk5O8PX1RZMmTTBixAjs27fPDtIThDT+/vtvzJs3zyoF9uuvv8ayZctkk6mm8uuvv2LevHnIzc21tyiysGrVKodWdm/duoV58+bh9OnT9hZFEufPn8fo0aPRoEEDfPbZZ1i7dq29RaqxuNhbAEfngw8+wNGjRzF06FDExcUhMzMTK1asQJs2bfDbb78ZWXLq1q2LhQsXAgAKCwtx6dIlbN++HV9++SWGDRuGL7/8Eq6urva4FLswYsQIDB8+HG5ubvYWhRDB33//jfnz56Nbt24WW9a+/vprnD17FlOnTtXbHhMTg+Li4lr1/Jvi119/xfz58zF69GijWf+FCxfg5FT181Vrzrtq1SoEBwc7jIVp7969ep9v3bqF+fPnIzY2FgkJCfYRygIOHjwItVqNTz75BA0bNrS3ODUaUojMMH36dHz99ddQKpXabUlJSWjVqhUWLVqEL7/8Uq+9n58fnnvuOb1tixYtwuTJk7Fq1SrExsbigw8+qBLZHQFnZ2c4OzvLdryioiJ4enrKdjyi6lAoFHB3d7e3GCZRq9UoKyuzu5z2mkDUpImLbp9dnblz5w4AmF0qY4yhpKQEHh4eVSBVDYURFtGmTRvWpk0bvW2JiYmsRYsWvO0rKipY8+bNmaenJ8vNzTV5bO44x48fZw8//DBzd3dnsbGx7NNPPzVqW1JSwubMmcMaNGjAlEolq1u3Lnv11VdZSUmJXjsA7OWXX2Y7duxgLVq0YEqlkjVv3pz9+OOPeu3S09PZxIkTWePGjZm7uzsLDAxkTz31FLt69aq2zZ9//skAsI0bNxrJ89NPPzEA7LvvvmOMMbZhwwYGQG9/xhhbuXIla968OVMqlaxOnTrspZdeYvfv3xe8D126dGEeHh5sypQpjDHGdu7cyfr168fq1KnDlEolq1+/PnvnnXdYRUWF3jFGjRrFYmJiTNztymt69NFHWVBQkPZ+jxkzRq+NSqViH3/8MWvevDlzc3NjoaGh7IUXXmA5OTlG7ebOncvq1KnDPDw8WLdu3dhff/3FYmJi2KhRo7TtuHtz5MgR9sorr7Dg4GDm5+fHXnjhBVZaWsru37/PRowYwfz9/Zm/vz979dVXmVqttkimmJgY9vjjj7MjR46w9u3bMzc3N1avXj32xRdfGMlj+O/AgQOi73liYqLR/tz9v3r1KgPANmzYoCdbSkoKe+SRR5inpyfz8/NjAwcOZH///bdem7lz5zIA7OLFi2zUqFHMz8+P+fr6stGjR7PCwkK9tnfv3mXnzp0z2s4H9158+eWXrHnz5szFxYXt2LGDMcbYjRs32JgxY1hoaKj2ffn888/19ue7ptTUVDZq1ChWr1495ubmxsLCwtiYMWNYdna20fUY/uPeE91nRcr7JlZuIYSe0V9++YVNmzaNBQcHM09PT/bEE0+wO3fu6O1neC2JiYna7+/fv8+mTJnC6taty5RKJWvQoAFbtGgRU6lURvdyyZIlbM2aNax+/fpMqVSydu3asT/++ENPztu3b7PRo0ezyMhIplQqWXh4OBs4cKBeP5OYmKiV4cCBA7z3e8OGDWzOnDnMxcVF73o4xo8fz/z8/FhxcTHv/VqyZAkDwNLT042+mzVrFnN1ddW+i//88w8bMmQICwsLY25ubiwyMpIlJSWZHA/47uvcuXO13z3++OPsp59+Ym3btmVubm7s448/Fn2/uXajRo1ivr6+zM/Pj40cOZKdOnXK6JnWvZe68PWvcvZJunJOnTqVxcTEMKVSySIjI9mIESPY3bt32YMHD5inpyebPHmy0X7Xr19nTk5O7P333xe8x7qQQmQBarWaRUZGskcffVRvuymFiDHGFixYwACw77//3uTxExMTWUREBAsNDWWTJk1i//nPf9gjjzzCAOh1bCqVij366KPM09OTTZ06la1Zs4ZNmjSJubi4sEGDBukdEwCLj49nderUYQsWLGDLli1j9evXZ56ennod9datW1l8fDybM2cOW7t2LXvjjTdYQEAAi4mJ0Rtg6tevz/r162ck+5gxY1hAQAArKytjjPErRNxg0KtXL7Z8+XI2adIk5uzszNq3b6/dj7sP4eHhLCQkhL3yyitszZo1bOfOnYwxxp544gk2bNgwtmTJEvbpp5+yoUOHMgBs5syZevKIUYiysrJYQEAAa9y4MVuyZAn77LPP2JtvvsmaNWum127cuHHMxcWFjR8/nq1evZq9/vrrzMvLy0ju1157jQFgAwYMYCtWrGDjx49ndevWZcHBwbyDTUJCAnvsscfYypUr2YgRIxgA9tprr7FHHnmEPfPMM2zVqlWsf//+DIBRZyFWppiYGNakSRMWFhbG3njjDbZixQrWpk0bplAo2NmzZxljjF2+fJlNnjyZAWBvvPEG27RpE9u0aRPLzMwUfc/37t3LEhISWHBwsHZ/TsHgUx727dvHXFxcWOPGjdnixYvZ/PnzWXBwMAsICOB9Zlq3bs2GDBnCVq1axcaNG6e9V7pwbTlFzhQAWLNmzVhISAibP38+W7lyJTt16hTLzMxkdevWZVFRUeydd95hn376KRs4cCADoB10hK7pww8/ZF26dGHvvPMOW7t2LZsyZQrz8PBgHTp00Cq0qamp7Omnn9Yej7tXBQUF2t9L91kR+76JlVsIIYWodevWrEePHmz58uVsxowZzNnZmQ0bNkzbbseOHaxu3bqsadOm2mvZu3cvY4yxwsJCFhcXx4KCgtgbb7zBVq9ezUaOHMkUCoV2gqN7L1u3bs0aNmzIPvjgA7Z48WIWHBzM6tatq/c8d+rUifn5+bG33nqLrVu3jr3//vuse/fu7NChQ9o2uoN4ZmYme+eddxgA9sILL2hlvHz5Mrt48SIDwJYvX653L0pLS1lAQAAbO3as4P26du0aUygUbPHixUbf1a9fnz3++OPaY9WrV49FRESwd999l61bt47Nnz+ftW/fnleZ0r2vgwcPZgDYp59+yjZt2sRSU1O1v1XDhg1ZQEAAmzVrFlu9ejU7cOCA6PutVqtZ165dmZOTE3vppZfY8uXLWY8ePVhcXJxVCpGcfRJjjD148IC1bNmSOTs7s/Hjx7NPP/2ULViwgLVv356dOnWKMcbYs88+y8LCwowmxIsXL2YKhYJdu3ZN8B7rQgqRBWzatMlIOWHMvEK0Y8cOBoB98sknJo/PzbKXLl2q3VZaWsoSEhJYaGio9qHatGkTc3JyYkeOHNHbf/Xq1QwAO3r0qHYbAKZUKtmlS5e021JTU406gqKiIiN5jh07xgCw//73v9pts2fP1pv9cDL6+/vrdSCGCtGdO3eYUqlkjz76qN5sZcWKFQwAW79+vdF9WL16tZFMfHJOmDCBeXp66lnHxChE3O/y559/CrY5cuQIA8C++uorve3cDJ3bnpmZyVxcXNgTTzyh127evHkMAO9g06dPHz3Lz8M
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"for name, g in dfc.groupby(\"seg_2D\"):\n",
|
|||
|
|
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.yscale(\"log\")\n",
|
|||
|
|
"plt.axvline(thr_freq, linestyle=\"--\")\n",
|
|||
|
|
"plt.axhline(thr_int, linestyle=\"--\")\n",
|
|||
|
|
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
|
|||
|
|
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
|
|||
|
|
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
|
|||
|
|
"plt.legend(markerscale=2)\n",
|
|||
|
|
"plt.show()\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 27,
|
|||
|
|
"id": "d05ac724-6aab-4ea9-bce5-d1b0252148a5",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_med</th>\n",
|
|||
|
|
" <th>gross_flow_qty_med</th>\n",
|
|||
|
|
" <th>freq_med</th>\n",
|
|||
|
|
" <th>rel_int_med</th>\n",
|
|||
|
|
" <th>n_tx_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>seg_2D</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>Highly active (high int, high freq)</th>\n",
|
|||
|
|
" <td>3624</td>\n",
|
|||
|
|
" <td>823.825696</td>\n",
|
|||
|
|
" <td>105.945544</td>\n",
|
|||
|
|
" <td>0.157379</td>\n",
|
|||
|
|
" <td>5.991354</td>\n",
|
|||
|
|
" <td>17.0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>Dormant (low int, low freq)</th>\n",
|
|||
|
|
" <td>3588</td>\n",
|
|||
|
|
" <td>102.161465</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>Small rebalancers (low int, high freq)</th>\n",
|
|||
|
|
" <td>1283</td>\n",
|
|||
|
|
" <td>543.424242</td>\n",
|
|||
|
|
" <td>12.149239</td>\n",
|
|||
|
|
" <td>0.100000</td>\n",
|
|||
|
|
" <td>0.911442</td>\n",
|
|||
|
|
" <td>15.0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>Occasional large movers (high int, low freq)</th>\n",
|
|||
|
|
" <td>1248</td>\n",
|
|||
|
|
" <td>39.278192</td>\n",
|
|||
|
|
" <td>4.349875</td>\n",
|
|||
|
|
" <td>0.025000</td>\n",
|
|||
|
|
" <td>4.497671</td>\n",
|
|||
|
|
" <td>1.0</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_med \\\n",
|
|||
|
|
"seg_2D \n",
|
|||
|
|
"Highly active (high int, high freq) 3624 823.825696 \n",
|
|||
|
|
"Dormant (low int, low freq) 3588 102.161465 \n",
|
|||
|
|
"Small rebalancers (low int, high freq) 1283 543.424242 \n",
|
|||
|
|
"Occasional large movers (high int, low freq) 1248 39.278192 \n",
|
|||
|
|
"\n",
|
|||
|
|
" gross_flow_qty_med freq_med \\\n",
|
|||
|
|
"seg_2D \n",
|
|||
|
|
"Highly active (high int, high freq) 105.945544 0.157379 \n",
|
|||
|
|
"Dormant (low int, low freq) 0.000000 0.000000 \n",
|
|||
|
|
"Small rebalancers (low int, high freq) 12.149239 0.100000 \n",
|
|||
|
|
"Occasional large movers (high int, low freq) 4.349875 0.025000 \n",
|
|||
|
|
"\n",
|
|||
|
|
" rel_int_med n_tx_med \n",
|
|||
|
|
"seg_2D \n",
|
|||
|
|
"Highly active (high int, high freq) 5.991354 17.0 \n",
|
|||
|
|
"Dormant (low int, low freq) 0.000000 0.0 \n",
|
|||
|
|
"Small rebalancers (low int, high freq) 0.911442 15.0 \n",
|
|||
|
|
"Occasional large movers (high int, low freq) 4.497671 1.0 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 27,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"profile_2d = (\n",
|
|||
|
|
" dfc.groupby(\"seg_2D\")\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_clients=(ID_COL, \"count\"),\n",
|
|||
|
|
" aum_qty_med=(\"aum_qty_mean\",\"median\"),\n",
|
|||
|
|
" gross_flow_qty_med=(\"gross_flow_qty_mean\",\"median\"),\n",
|
|||
|
|
" freq_med=(\"frequency\",\"median\"),\n",
|
|||
|
|
" rel_int_med=(\"rel_intensity_total\",\"median\"),\n",
|
|||
|
|
" n_tx_med=(\"n_tx_total\",\"median\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values(\"n_clients\", ascending=False)\n",
|
|||
|
|
")\n",
|
|||
|
|
"profile_2d\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "4ceeeec6-4f52-4b8b-9c14-2419181d0a5f",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Clustering 2 :KMEANS"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 23,
|
|||
|
|
"id": "6afd24fe-0886-445c-a330-6a537d6aaa1a",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Clustering matrix shape: (9743, 6)\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"import numpy as np\n",
|
|||
|
|
"import pandas as pd\n",
|
|||
|
|
"import matplotlib.pyplot as plt\n",
|
|||
|
|
"\n",
|
|||
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
|
"from sklearn.cluster import KMeans\n",
|
|||
|
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
|
"from sklearn.mixture import GaussianMixture\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Safety: ensure baseline-2 columns exist\n",
|
|||
|
|
"dfc = dfc.copy()\n",
|
|||
|
|
"dfc[\"frequency\"] = dfc[\"flow_freq\"]\n",
|
|||
|
|
"dfc[\"rel_intensity_total\"] = dfc[\"gross_flow_to_aum\"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Choose a compact, interpretable feature set (quantity-based)\n",
|
|||
|
|
"features = [\n",
|
|||
|
|
" \"log_aum_qty_mean\", # size (log)\n",
|
|||
|
|
" \"log_gross_flow_qty_mean\", # activity intensity (log)\n",
|
|||
|
|
" \"frequency\", # activity frequency\n",
|
|||
|
|
" \"rel_intensity_total\", # turnover proxy\n",
|
|||
|
|
" \"net_flow_qty_vol\", # volatility of net flows\n",
|
|||
|
|
" \"n_tx_total\", # total number of transactions\n",
|
|||
|
|
"]\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Build X (drop NaNs/Infs)\n",
|
|||
|
|
"X = (dfc[features]\n",
|
|||
|
|
" .replace([np.inf, -np.inf], np.nan)\n",
|
|||
|
|
" .dropna()\n",
|
|||
|
|
" .copy())\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Keep IDs aligned\n",
|
|||
|
|
"ids = dfc.loc[X.index, ID_COL].copy()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Standardize (critical for distance-based clustering)\n",
|
|||
|
|
"scaler = StandardScaler()\n",
|
|||
|
|
"X_scaled = scaler.fit_transform(X)\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Clustering matrix shape:\", X_scaled.shape)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 24,
|
|||
|
|
"id": "d2e58d79-0638-4cfa-9149-f3bfe5f89399",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAeQJJREFUeJzt3Xd8U1X/B/BPkjbpTPektBTKKi1lSSlbqBQsKOJARGQIihRliVAVEHy0gAsHgujzAwSR9QDKtpYNZRUKlD3K7gBK00Fncn5/1EZCCySQcjs+79crL8m9JzefmyL59txzz5EJIQSIiIiI6IHkUgcgIiIiqgpYNBEREREZgUUTERERkRFYNBEREREZgUUTERERkRFYNBEREREZgUUTERERkRFYNBEREREZgUUTERERkRFYNBFVYTKZDJ988on++SeffAKZTIabN29KF6oGycnJwdChQ+Hp6QmZTIbRo0dLHYmIKhCLJqJKZsGCBZDJZPd97N27V+qI9I/PP/8cCxYswDvvvINFixZhwIABFfp+derUQc+ePctsX7RoERQKBbp37478/PwKzUBUk1lIHYCIyjdt2jT4+/uX2R4QECBBGirPli1b0KZNG0yZMkWyDL/99hsGDRqE8PBwrFmzBlZWVpJlIaruWDQRVVI9evRAq1atpI5RpeTm5sLW1vaJvV96ejoCAwPNdrzi4mLodDoolUqj2i9duhQDBw5Ely5d8Mcff7BgIqpgvDxHVA3dvHkTr7zyCtRqNVxcXDBq1Kgyl22Ki4vx6aefol69elCpVKhTpw4+/PBDFBQU6NuMHTsWLi4uEELot7377ruQyWT47rvv9NvS0tIgk8kwZ86ch2ZbvHgxWrduDRsbGzg5OaFjx47466+/9PvvHadVqk6dOhg0aJD+eellzO3bt2PEiBFwd3eHj48PVq5cqd9+r59++gkymQxJSUn6badOncJLL70EZ2dnWFlZoVWrVvjzzz8feA7btm2DTCZDcnIy1q9fr790evHiRQAlxdSbb74JDw8PWFlZISQkBAsXLjQ4xsWLFyGTyfDll19i1qxZ+p/DiRMnHvoZAsDy5cvx+uuvo3Pnzvjzzz8fWjDJZDKMHDkSK1asQGBgIKytrREWFoZjx47pP5uAgABYWVmhc+fO+nO52759+9C9e3c4ODjAxsYGnTp1wu7duw3aXLp0CSNGjEDDhg1hbW0NFxcXvPzyy2WOV/rz2717N8aOHQs3NzfY2trihRdewI0bNwzaHjx4EBEREXB1dYW1tTX8/f0xZMgQoz4nInNiTxNRJaXRaMoM6JbJZHBxcXnoa1955RXUqVMHMTEx2Lt3L7777jvcvn0bv/76q77N0KFDsXDhQrz00ksYN24c9u3bh5iYGJw8eRKrV68GAHTo0AHffPMNjh8/jqCgIADAzp07IZfLsXPnTrz33nv6bQDQsWPHB+aaOnUqPvnkE7Rt2xbTpk2DUqnEvn37sGXLFnTr1s34D+cuI0aMgJubGyZPnozc3FxERkbCzs4Oy5cvR6dOnQzaLlu2DE2aNNGfy/Hjx9GuXTvUqlULEydOhK2tLZYvX47evXvjf//7H1544YVy37Nx48ZYtGgRxowZAx8fH4wbNw4A4Obmhry8PHTu3Bnnzp3DyJEj4e/vjxUrVmDQoEHIzMzEqFGjDI41f/585Ofn46233oJKpYKzs/NDz/l///sf+vfvj44dO2Lt2rWwtrY26rPauXMn/vzzT0RFRQEAYmJi0LNnT3zwwQf48ccfMWLECNy+fRszZ87EkCFDsGXLFv1rt2zZgh49eqBly5aYMmUK5HI55s+fjy5dumDnzp1o3bo1AODAgQPYs2cPXn31Vfj4+ODixYuYM2cOOnfujBMnTsDGxsYg07vvvgsnJydMmTIFFy9exKxZszBy5EgsW7YMQEkB2q1bN7i5uWHixIlwdHTExYsXsWrVKqPOmcisBBFVKvPnzxcAyn2oVCqDtgDElClT9M+nTJkiAIjnnnvOoN2IESMEAHHkyBEhhBCJiYkCgBg6dKhBu/fff18AEFu2bBFCCJGeni4AiB9//FEIIURmZqaQy+Xi5ZdfFh4eHvrXvffee8LZ2VnodLr7ntfZs2eFXC4XL7zwgtBqtQb77n7dvedUys/PTwwcOLDM59S+fXtRXFxs0LZfv37C3d3dYHtKSoqQy+Vi2rRp+m1du3YVwcHBIj8/3yBL27ZtRf369e97LndnioyMNNg2a9YsAUAsXrxYv62wsFCEhYUJOzs7kZWVJYQQIjk5WQAQarVapKenP/S9St/P29tbWFhYiM6dO4vc3FyjXieE0P/9SU5O1m/76aefBADh6empzyWEENHR0QKAvq1OpxP169cXERERBj+rO3fuCH9/f/HMM88YbLtXfHy8ACB+/fVX/bbSn194eLjBMceMGSMUCoXIzMwUQgixevVqAUAcOHDA6HMlqii8PEdUSc2ePRuxsbEGj40bNxr12tKehFLvvvsuAGDDhg0G/x07dqxBu9Iek/Xr1wMo6Tlp1KgRduzYAQDYvXs3FAoFxo8fj7S0NJw9exZASQ9G+/btIZPJ7ptpzZo10Ol0mDx5MuRyw396HvS6hxk2bBgUCoXBtr59+yI9PR3btm3Tb1u5ciV0Oh369u0LAMjIyMCWLVvwyiuvIDs7Gzdv3sTNmzdx69YtRERE4OzZs7h27ZrJeTZs2ABPT0/069dPv83S0hLvvfcecnJyylw2fPHFF+Hm5mb08TMyMlBcXAwfHx+je5hKde3aFXXq1NE/Dw0N1Wewt7cvs/3ChQsAgMTERJw9exavvfYabt26pf+scnNz0bVrV+zYsQM6nQ4ADDIVFRXh1q1bCAgIgKOjIw4dOlQm01tvvWXw8+/QoQO0Wi0uXboEAHB0dAQArFu3DkVFRSadL5G58fIcUSXVunXrRx4IXr9+fYPn9erVg1wu148ruXTpEuRyeZk78Tw9PeHo6Kj/wgJKvsRKi6ydO3eiVatWaNWqFZydnbFz5054eHjgyJEjeO211x6Y6fz585DL5WYdOA2g3DsMS8fdLFu2DF27dgVQcmmuWbNmaNCgAQDg3LlzEEJg0qRJmDRpUrnHTk9PR61atUzKc+nSJdSvX79MYdi4cWP9/oflf5CuXbvC19cXc+bMgbOzM7799lv9Po1Gg7y8PP1zpVJpcLnP19fX4FgODg4AgNq1a5e7/fbt2wCgL44HDhx431wajQZOTk7Iy8tDTEwM5s+fj2vXrhmMh9NoNGVed28mJycng/fu1KkTXnzxRUydOhXffPMNOnfujN69e+O1116DSqW6bx6iisCiiagGuF9PjjE9PO3bt8fPP/+MCxcuYOfOnejQoQNkMhnat2+PnTt3wtvbGzqdDh06dDB3bANarbbc7eX1tqhUKvTu3RurV6/Gjz/+iLS0NOzevRuff/65vk1pz8j777+PiIiIco/9JKZ3MLW3CAB++OEH3L59G9999x2cnJz0A+dHjRplMOC8U6dOBr1t9/bIPWx7acFT+ll98cUXaNasWblt7ezsAJT0as6fPx+jR49GWFgYHBwcIJPJ8Oqrr+qPY8p7y2QyrFy5Env37sXatWuxefNmDBkyBF999RX27t2rf1+iJ4FFE1E1dPbsWYMejHPnzkGn0+kvzfj5+UGn0+Hs2bP6HhCg5C64zMxM+Pn56beVFkOxsbE4cOAAJk6cCKBk0PecOXPg7e0NW1tbtGzZ8oGZ6tWrB51OhxMnTtz3ixco6WnIzMw02FZYWIiUlBRjTl2vb9++WLhwIeLi4nDy5EkIIfSX5gCgbt26AEounYWHh5t07Afx8/PD0aNHodPpDHqbTp06pd//uORyOX799VdoNBpMnToVzs7OeO+99/DBBx/g9ddf17cr7bV5XPXq1QMAqNXqh35WK1euxMCBA/HVV1/pt+Xn55f5mZqqTZs2aNOmDT777DMsWbIE/fv3x9KlSzF06NDHOi6RKTimiagamj17tsHz77//HkDJ3E8A8OyzzwIAZs2aZdDu66+/BgBERkbqt/n7+6NWrVr45ptvUFRUhHb
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZodJREFUeJzt3XlcVOX+B/DPzMDMsA4iMCwiq4ooriiiuYIiLmlZmtdyK+ualYZmWlfNrFC79dO6lmm5lNdyy9KuS4ZLabihKO6KqKjs+yLbzPn9gUxOgDI4cBj4vF+vedk8Z5nvQYKP53me80gEQRBARERE1IRIxS6AiIiIqL4xABEREVGTwwBERERETQ4DEBERETU5DEBERETU5DAAERERUZPDAERERERNDgMQERERNTkMQERERNTkMAARGYmnpycmTpyoe3/w4EFIJBIcPHhQ19avXz+0b9++/oujevPxxx/D29sbMpkMnTp1ErscIqoGAxDRI8TFxeGZZ56Bh4cHlEol3NzcMHDgQHz++edil1YnLly4gPfeew83btyotO2LL77AunXr6r0mU/Hrr79i9uzZ6NWrF9auXYuPPvqoTj9v4sSJsLa2rtR+9uxZODg4wNPTs8q/RyICzMQugKgh+/PPP9G/f3+0bNkSU6ZMgbOzMxITE3H06FEsX74cr7/+um7fy5cvQyo1/X9TXLhwAQsXLkS/fv3g6empt+2LL76Ag4OD3p0u+sv+/fshlUrxzTffQC6Xi1LDuXPnEBISAisrKxw4cKDS3yERlWMAInqIDz/8ECqVCidOnICdnZ3ettTUVL33CoWiHisjYxIEAUVFRbCwsHis86SmpsLCwsJo4cfQus6fP48BAwbAwsICBw4cgJeXl1HqIGqMTP+fq0R1KD4+Hu3atasUfgDAyclJ7/3fxwA9zIULF9C/f39YWlrCzc0NS5curbRPamoqXnzxRajVaiiVSnTs2BHr16/X26eqcUYAcOPGDUgkkkrdVZcuXcIzzzwDe3t7KJVKBAYGYseOHbrt69atw7PPPgsA6N+/PyQSie78np6eOH/+PA4dOqRr79evn+7Y7OxszJgxA+7u7lAoFPD19cWSJUug1Wof+fU4efIkwsLC4ODgAAsLC3h5eWHy5Ml6+2i1WixfvhwBAQFQKpVwdHTE4MGDcfLkSd0+ZWVlWLRoEXx8fKBQKODp6Yl33nkHxcXFeufy9PTEsGHDsHfvXgQGBsLCwgJfffXVY12HRCLB2rVrUVBQoPv6VHz9jVHXo1y8eBEhISFQKBQ4cOAAvL29H7p/xXi0s2fPom/fvrC0tISvry+2bt0KADh06BCCgoJgYWGBNm3a4Lfffqt0jjt37mDy5MlQq9VQKBRo164d1qxZo7dPSUkJ5s+fj65du0KlUsHKygq9e/fGgQMH9Par+J7997//jVWrVum+Vt26dcOJEyf09k1OTsakSZPQokULKBQKuLi4YMSIEezuI4PwDhDRQ3h4eCA6Ohrnzp0z2uDlrKwsDB48GE8//TRGjx6NrVu34u2330ZAQADCw8MBAPfu3UO/fv1w7do1vPbaa/Dy8sKWLVswceJEZGdnY/r06QZ/7vnz59GrVy+4ublhzpw5sLKywubNmzFy5Ehs27YNTz31FPr06YM33ngDn332Gd555x20bdsWANC2bVssW7YMr7/+OqytrfHuu+8CANRqNQCgsLAQffv2xZ07d/DKK6+gZcuW+PPPPzF37lwkJSVh2bJl1daVmpqKQYMGwdHREXPmzIGdnR1u3LiBH3/8UW+/F198EevWrUN4eDheeukllJWV4Y8//sDRo0cRGBgIAHjppZewfv16PPPMM5g5cyaOHTuGyMhIXLx4Edu3b9c73+XLlzF27Fi88sormDJlCtq0afNY1/Hdd99h1apVOH78OL7++msAQM+ePY1S16NcvnwZAwYMgJmZGQ4cOAAfH59HHgOUfy8OGzYMzz33HJ599ll8+eWXeO655/Df//4XM2bMwD//+U/84x//wMcff4xnnnkGiYmJsLGxAQCkpKSgR48ekEgkeO211+Do6Ijdu3fjxRdfRG5uLmbMmAEAyM3Nxddff42xY8diypQpyMvLwzfffIOwsDAcP3680kDxjRs3Ii8vD6+88gokEgmWLl2Kp59+GtevX4e5uTkAYNSoUTh//jxef/11eHp6IjU1Ffv27cOtW7fY5Uc1JxBRtX799VdBJpMJMplMCA4OFmbPni3s3btXKCkpqbSvh4eHMGHCBN37AwcOCACEAwcO6Nr69u0rABC+/fZbXVtxcbHg7OwsjBo1Ste2bNkyAYCwYcMGXVtJSYkQHBwsWFtbC7m5udV+hiAIQkJCggBAWLt2ra4tJCRECAgIEIqKinRtWq1W6Nmzp9CqVStd25YtW6o8pyAIQrt27YS+fftWal+0aJFgZWUlXLlyRa99zpw5gkwmE27dulXpmArbt28XAAgnTpyodp/9+/cLAIQ33nij0jatVisIgiDExsYKAISXXnpJb/usWbMEAML+/ft1bR4eHgIAYc+ePUa7DkEQhAkTJghWVlZ6bcao62GfZ25uLri4uAiurq6V6n6Yiu/FjRs36touXbokABCkUqlw9OhRXfvevXsrfT+9+OKLgouLi5Cenq533ueee05QqVRCYWGhIAiCUFZWJhQXF+vtk5WVJajVamHy5Mm6torv2ebNmwuZmZm69p9//lkAIOzcuVN3LADh448/rvG1ElWFXWBEDzFw4EBER0fjySefxJkzZ7B06VKEhYXBzc1Nr+vIENbW1nj++ed17+VyObp3747r16/r2nbt2gVnZ2eMHTtW12Zubo433ngD+fn5OHTokEGfmZmZif3792P06NHIy8tDeno60tPTkZGRgbCwMFy9ehV37typ1fUAwJYtW9C7d280a9ZMd+709HSEhoZCo9Hg999/r/bYiu7FX375BaWlpVXus23bNkgkEixYsKDSNolEAqD8awYAERERettnzpwJAPjf//6n1+7l5YWwsDCjXUd1jFHXw2g0GqSnp8Pe3h4ODg4G1WZtbY3nnntO975Nmzaws7ND27ZtERQUpGuv+O+K71FBELBt2zYMHz4cgiDofa3CwsKQk5ODU6dOAQBkMpluTJRWq0VmZibKysoQGBio2+dBY8aMQbNmzXTve/furffZFWOsDh48iKysLIOul+hB7AIjeoRu3brhxx9/RElJCc6cOYPt27fj//7v//DMM88gNjYW/v7+Bp2vRYsWul/aFZo1a4azZ8/q3t+8eROtWrWqNKusokvq5s2bBn3mtWvXIAgC5s2bh3nz5lW5T2pqKtzc3Aw6b4WrV6/i7NmzcHR0rPbc1enbty9GjRqFhQsX4v/+7//Qr18/jBw5Ev/4xz90A8vj4+Ph6uoKe3v7as9z8+ZNSKVS+Pr66rU7OzvDzs6u0tesqgHCj3MddVnXw1hYWODrr7/GuHHjMHToUOzbtw9WVlYAyrtSc3JyKn1uhaq+F1UqFdzd3Su1AdAFjrS0NGRnZ2PVqlVYtWpVlXU9+LVav349PvnkE1y6dEkv5FZ1rS1bttR7XxGGKj5boVBgyZIlmDlzJtRqNXr06IFhw4Zh/PjxetdG9CgMQEQ1JJfL0a1bN3Tr1g2tW7fGpEmTsGXLlirvSjyMTCarsl0QBINr+vsvrwoajUbvfcUA3lmzZlV7d+Hvv6ANodVqMXDgQMyePbvK7a1bt672WIlEgq1bt+Lo0aPYuXMn9u7di8mTJ+OTTz7B0aNHq3zOzcNU9zX5u6pmVj3OddRlXY/y3HPPISsrC6+++iqefvpp7Ny5E3K5HJs2bcKkSZP09n3w+6y678VHfY9WfD89//zzmDBhQpX7dujQAQCwYcMGTJw4ESNHjsRbb70FJycnyGQyREZGIj4+3uDPBoAZM2Zg+PDh+Omnn7B3717MmzcPkZGR2L9/Pzp37lzl8UR/xwBEVAsVg26TkpLq5PweHh44e/YstFqt3l2gS5cu6bYDf/3rODs7W+/4v99VqJgRZG5ujtDQ0Id+9sN+UVe3zcfHB/n5+Y8898P
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Best K by silhouette: 3\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"k_range = range(2, 11)\n",
|
|||
|
|
"inertias = []\n",
|
|||
|
|
"silhouettes = []\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in k_range:\n",
|
|||
|
|
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
|
|||
|
|
" labels = km.fit_predict(X_scaled)\n",
|
|||
|
|
" inertias.append(km.inertia_)\n",
|
|||
|
|
" silhouettes.append(silhouette_score(X_scaled, labels))\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Elbow plot\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.plot(list(k_range), inertias, marker=\"o\")\n",
|
|||
|
|
"plt.xlabel(\"Number of clusters K\")\n",
|
|||
|
|
"plt.ylabel(\"Inertia (within-cluster SSE)\")\n",
|
|||
|
|
"plt.title(\"Elbow curve for K-means\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Silhouette plot\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.plot(list(k_range), silhouettes, marker=\"o\")\n",
|
|||
|
|
"plt.xlabel(\"Number of clusters K\")\n",
|
|||
|
|
"plt.ylabel(\"Silhouette score\")\n",
|
|||
|
|
"plt.title(\"Silhouette score for K-means\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"best_k = list(k_range)[int(np.argmax(silhouettes))]\n",
|
|||
|
|
"print(\"Best K by silhouette:\", best_k)\n",
|
|||
|
|
"\n",
|
|||
|
|
"\n",
|
|||
|
|
"''' Ce que c’est :\n",
|
|||
|
|
"Inertia = somme des distances intra-cluster (SSE).\n",
|
|||
|
|
"Plus elle baisse, plus les clusters sont “serrés”.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Comment lire :\n",
|
|||
|
|
"Quand K augmente, inertia baisse toujours (normal).\n",
|
|||
|
|
"On cherche un “coude” : à partir d’un certain K, ajouter des clusters apporte peu\n",
|
|||
|
|
"'''"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "ddfb03e9-a9ff-463d-b653-f752773aacd9",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Valeurs Silhouette:\n",
|
|||
|
|
"proche de 1 : très bon\n",
|
|||
|
|
"0 : clusters se chevauchent\n",
|
|||
|
|
"négatif : mauvais clustering"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 25,
|
|||
|
|
"id": "ea340dca-a76a-4d50-8973-af1dca34c376",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_med</th>\n",
|
|||
|
|
" <th>freq_med</th>\n",
|
|||
|
|
" <th>rel_int_med</th>\n",
|
|||
|
|
" <th>gross_flow_med</th>\n",
|
|||
|
|
" <th>n_tx_med</th>\n",
|
|||
|
|
" <th>vol_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_kmeans</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0.0</th>\n",
|
|||
|
|
" <td>7718</td>\n",
|
|||
|
|
" <td>104.983133</td>\n",
|
|||
|
|
" <td>0.024390</td>\n",
|
|||
|
|
" <td>1.039412e+00</td>\n",
|
|||
|
|
" <td>0.774805</td>\n",
|
|||
|
|
" <td>1.0</td>\n",
|
|||
|
|
" <td>5.080794</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1.0</th>\n",
|
|||
|
|
" <td>2024</td>\n",
|
|||
|
|
" <td>14064.797371</td>\n",
|
|||
|
|
" <td>0.575379</td>\n",
|
|||
|
|
" <td>4.591024e+00</td>\n",
|
|||
|
|
" <td>1100.212569</td>\n",
|
|||
|
|
" <td>198.5</td>\n",
|
|||
|
|
" <td>2291.806790</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2.0</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>0.000079</td>\n",
|
|||
|
|
" <td>0.014706</td>\n",
|
|||
|
|
" <td>1.257232e+06</td>\n",
|
|||
|
|
" <td>1.468240</td>\n",
|
|||
|
|
" <td>1.0</td>\n",
|
|||
|
|
" <td>12.107415</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_med freq_med rel_int_med \\\n",
|
|||
|
|
"cluster_kmeans \n",
|
|||
|
|
"0.0 7718 104.983133 0.024390 1.039412e+00 \n",
|
|||
|
|
"1.0 2024 14064.797371 0.575379 4.591024e+00 \n",
|
|||
|
|
"2.0 1 0.000079 0.014706 1.257232e+06 \n",
|
|||
|
|
"\n",
|
|||
|
|
" gross_flow_med n_tx_med vol_med \n",
|
|||
|
|
"cluster_kmeans \n",
|
|||
|
|
"0.0 0.774805 1.0 5.080794 \n",
|
|||
|
|
"1.0 1100.212569 198.5 2291.806790 \n",
|
|||
|
|
"2.0 1.468240 1.0 12.107415 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 25,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"km = KMeans(n_clusters=best_k, n_init=50, random_state=42)\n",
|
|||
|
|
"labels_km = km.fit_predict(X_scaled)\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc.loc[X.index, \"cluster_kmeans\"] = labels_km\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Profiling table (medians = robust to outliers)\n",
|
|||
|
|
"k_profile = (\n",
|
|||
|
|
" dfc.loc[X.index]\n",
|
|||
|
|
" .groupby(\"cluster_kmeans\")\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_clients=(ID_COL, \"count\"),\n",
|
|||
|
|
" aum_qty_med=(\"aum_qty_mean\", \"median\"),\n",
|
|||
|
|
" freq_med=(\"frequency\", \"median\"),\n",
|
|||
|
|
" rel_int_med=(\"rel_intensity_total\", \"median\"),\n",
|
|||
|
|
" gross_flow_med=(\"gross_flow_qty_mean\", \"median\"),\n",
|
|||
|
|
" n_tx_med=(\"n_tx_total\", \"median\"),\n",
|
|||
|
|
" vol_med=(\"net_flow_qty_vol\", \"median\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values(\"n_clients\", ascending=False)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"k_profile\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "7702b855-8c02-486e-9c28-0e8c1b872a73",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## PARTIE 3 — GMM (Gaussian Mixture Model)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "810e73ed-f479-4f76-806c-3c18d281a931",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"BIC/AIC = critères pour choisir le nombre de composantes (clusters) dans un modèle probabiliste.\n",
|
|||
|
|
"Plus bas = meilleur compromis “fit” vs “complexité”."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 26,
|
|||
|
|
"id": "ba9df953-cc88-4907-9fe7-b88978f318b3",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAHHCAYAAAD3WI8lAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjfBJREFUeJzs3Xd4FFXbx/Hv7G56D6RSQw1VmkDoSgldRAUBBRHLg1gQHhFsqK9Ks4AF7IAIIqgUQUCkCoRO6N3QSSghnbTd8/4Rsw9LEkhgk0m5P9c1F9mZMzO/2SzszcyZM5pSSiGEEEIIIYqUQe8AQgghhBBlkRRhQgghhBA6kCJMCCGEEEIHUoQJIYQQQuhAijAhhBBCCB1IESaEEEIIoQMpwoQQQgghdCBFmBBCCCGEDqQIE0IIIYTQgRRhQoi7sn79ejRNY/369QVed9asWWiaxqlTp+yeKy9vv/02mqYV2f5upGkab7/9ti77Bnjuuefo3Lmz9fWpU6fQNI1Zs2bplsle7PlZqlq1Kj179rz7UDrLyMigUqVKTJ8+Xe8oIg9ShAlxg6ioKJ5//nlq1aqFq6srrq6u1K1blxEjRrBv3z6bttlf5gaDgbNnz+bYVkJCAi4uLmiaxvPPP2+dn/3Fp2ka7733Xq45Bg0ahKZpuLu72/cARaH7448/dC208hIVFcW3337La6+9pneUMiclJYW333471/+oFObnxcHBgVGjRvH++++TmppaKPsQd0eKMCH+tWzZMurXr8+cOXPo1KkTn3zyCdOmTaNbt2788ccfNGrUiNOnT+dYz8nJiZ9++inH/N9+++2W+3N2ds51veTkZJYsWYKzs/OdH4zQzR9//ME777yT67Lr16/zxhtvFHGiLNOmTSMkJIT77rtPl/0Xtscff5zr169TpUoVvaPkkJKSwjvvvJNnEZbX58Uehg4dypUrV5g3b16h7UPcOSnChABOnjzJo48+SpUqVThy5AjTp0/n2Wef5emnn+ajjz7i+PHjTJ06FYMh51+Z7t2751pMzZs3jx49euS5z+7du3Po0CH27t1rM3/JkiWkp6fbXDYSpYOzszMmk6nI95uRkcHcuXPp169fke/bnpKTk/NcZjQacXZ21u1Sc3GilOL69esAeHt706VLl1Jxybk0kiJMCGDy5MkkJyczc+ZMgoKCciw3mUy8+OKLVKpUKceygQMHEhkZyZEjR6zzoqOjWbt2LQMHDsxzn2FhYYSEhOT4H+rcuXPp2rUrvr6++cr+xBNP4O7uzpkzZ+jZsyfu7u5UqFCBL774AoD9+/dz//334+bmRpUqVXL9H/E///zDI488gq+vL66urrRs2ZLly5fnaHfu3Dn69OmDm5sb/v7+vPzyy6SlpeWaa9u2bXTt2hUvLy9cXV1p3749mzdvztcx3Sw6OpqhQ4dSsWJFnJycCAoK4oEHHsjR/2fFihW0bdsWNzc3PDw86NGjBwcPHszXPn788UeaNm2Ki4sLvr6+PProo7leZt62bRvdu3fHx8cHNzc3GjZsyLRp04Cs30X2+559yfnGoiC3PmF79uyhW7dueHp64u7uTseOHdm6datNm+z+Tps3b2bUqFH4+fnh5ubGgw8+yOXLl297bJs2beLKlSt06tQpX+/F2rVrre+jt7c3DzzwAIcPH7Yu37dvH5qmsXTpUuu8Xbt2oWkaTZo0sdlWt27daNGihc28/Pyesj/XJ0+epHv37nh4eDBo0KA8M+fWJ2znzp2Eh4dTvnx5XFxcCAkJ4cknn8zXewDw559/0qhRI5ydnalbt26uZ7fj4uIYOXIklSpVwsnJiRo1ajBp0iQsFguQ1f3Az88PgHfeecf6mXj77bdv+3mxWCxMnTqVevXq4ezsTEBAAM8++yzXrl2zyZDdh23VqlU0a9YMFxcXvvrqK+vyzp07s2nTJmJjY/N97KJoSBEmBFmXImvUqJHjyyI/2rVrR8WKFW2Km59//hl3d/dbngkDGDBgAPPnz0cpBcCVK1f4888/b1m85cZsNtOtWzcqVarE5MmTqVq1Ks8//zyzZs2ia9euNGvWjEmTJuHh4cHgwYOJioqyrhsTE0OrVq1YtWoVzz33nLX/SO/evVm0aJG13fXr1+nYsSOrVq3i+eef5/XXX+fvv/9mzJgxOfKsXbuWdu3akZCQwPjx4/nggw+Ii4vj/vvvZ/v27QU6NoCHHnqIRYsWMXToUKZPn86LL75IYmIiZ86csbaZM2cOPXr0wN3dnUmTJvHmm29y6NAh2rRpc9vO2u+//z6DBw+mZs2afPzxx4wcOZI1a9bQrl074uLirO1Wr15Nu3btOHToEC+99BIfffQR9913H8uWLQPg2WeftZ7BnDNnjnXKy8GDB2nbti179+5lzJgxvPnmm0RFRdGhQwe2bduWo/0LL7zA3r17GT9+PMOHD+f333+36W+Yly1btqBpGo0bN75t27/++ovw8HAuXbrE22+/zahRo9iyZQutW7e2vo/169fH29ubjRs3Wtf7+++/MRgM7N27l4SEBCCriNiyZQvt2rWztivI7ykzM5Pw8HD8/f358MMPeeihh26bP9ulS5fo0qULp06dYuzYsXz22WcMGjQoR4Gbl+PHj9O/f3+6devGhAkTMJlMPPLII6xevdraJiUlhfbt2/Pjjz8yePBgPv30U1q3bs24ceMYNWoUAH5+fsyYMQOABx980PqZ6Nu3720/L88++yyvvPIKrVu3Ztq0aQwdOpS5c+cSHh5ORkaGTd6jR48yYMAAOnfuzLRp02jUqJF1WdOmTVFKsWXLlny/f6KIKCHKuPj4eAWoPn365Fh27do1dfnyZeuUkpJiXTZ+/HgFqMuXL6v//ve/qkaNGtZl9957rxo6dKhSSilAjRgxwrosKipKAWrKlCnqwIEDClB///23UkqpL774Qrm7u6vk5GQ1ZMgQ5ebmdtv8Q4YMUYD64IMPbHK7uLgoTdPU/PnzrfOPHDmiADV+/HjrvJEjR9pkUEqpxMREFRISoqpWrarMZrNSSqmpU6cqQC1YsMDaLjk5WdWoUUMBat26dUoppSwWi6pZs6YKDw9XFovF2jYlJUWFhISozp07W+fNnDlTASoqKirP47t27Zr1/cpLYmKi8vb2Vk8//bTN/OjoaOXl5WUzP/v3lu3UqVPKaDSq999/32bd/fv3K5PJZJ2fmZmpQkJCVJUqVdS1a9ds2t54nCNGjFB5/dN683vfp08f5ejoqE6ePGmdd+HCBeXh4aHatWtnnZf9PnXq1MlmXy+//LIyGo0qLi4ur7dGKaXUY489psqVK5djfvZncebMmdZ5jRo1Uv7+/urq1avWeXv37lUGg0ENHjzYOq9Hjx6qefPm1td9+/ZVffv2VUajUa1YsUIppdTu3bsVoJYsWaKUKtjvKftzPXbs2FseW7abP0uLFi1SgNqxY0e+1r9RlSpVFKB+/fVX67z4+HgVFBSkGjdubJ33f//3f8rNzU0dO3bMZv2xY8cqo9Gozpw5o5RS6vLlyzl+99ny+rz8/fffClBz5861mb9y5coc87Pzrly5MtfjuXDhggLUpEmTbn/wokjJmTBR5mX/rz23OxE7dOiAn5+fdcq+dHCzgQMHcuLECXbs2GH9Mz9ns+rVq0fDhg2tfcrmzZvHAw88gKura4GP46mnnrL+7O3tTe3atXFzc7PpB1S7dm28vb35559/rPP++OMPmjdvTps2bazz3N3deeaZZzh16hSHDh2ytgsKCuLhhx+2tnN1deWZZ56xyREZGcnx48cZOHAgV69e5cqVK1y5coXk5GQ6duzIxo0brZdq8sPFxQVHR0fWr1+f4zJMttWrVxMXF8eAAQOs+7ty5QpGo5EWLVqwbt26PLf/22+/YbFY6Nevn826gYGB1KxZ07runj17iIqKYuTIkXh7e9ts4076IZnNZv7880/69OlDtWrVrPODgoIYOHAgmzZtsn42sz3zzDM2+2rbti1msznXG0ZudPXqVXx8fG6b6eLFi0RGRvLEE0/YXA5v2LAhnTt35o8//rDZ9+7du639tDZt2kT37t1p1KgRf//
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Best K by BIC: 10\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"bics, aics = [], []\n",
|
|||
|
|
"models = {}\n",
|
|||
|
|
"\n",
|
|||
|
|
"for k in k_range:\n",
|
|||
|
|
" gmm = GaussianMixture(n_components=k, covariance_type=\"full\", random_state=42)\n",
|
|||
|
|
" gmm.fit(X_scaled)\n",
|
|||
|
|
" bics.append(gmm.bic(X_scaled))\n",
|
|||
|
|
" aics.append(gmm.aic(X_scaled))\n",
|
|||
|
|
" models[k] = gmm\n",
|
|||
|
|
"\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.plot(list(k_range), bics, marker=\"o\", label=\"BIC\")\n",
|
|||
|
|
"plt.plot(list(k_range), aics, marker=\"o\", label=\"AIC\")\n",
|
|||
|
|
"plt.xlabel(\"Number of components K\")\n",
|
|||
|
|
"plt.ylabel(\"Criterion value\")\n",
|
|||
|
|
"plt.title(\"GMM model selection (lower is better)\")\n",
|
|||
|
|
"plt.legend()\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"best_k_gmm = list(k_range)[int(np.argmin(bics))]\n",
|
|||
|
|
"print(\"Best K by BIC:\", best_k_gmm)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "ce9b76de-2191-4528-8913-c2bb1911e97c",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Graphe :\n",
|
|||
|
|
"- BIC/AIC continuent de baisser jusqu’à K=10. ==> Donc “Best K by BIC = 10”.\n",
|
|||
|
|
"\n",
|
|||
|
|
"Attention importante :\n",
|
|||
|
|
"Quand BIC baisse toujours avec K, ça peut signifier :\n",
|
|||
|
|
"- les données ont beaucoup de structure fine / multi-groupes\n",
|
|||
|
|
"- OU le modèle “découpe” surtout des outliers / sous-groupes très rares\n",
|
|||
|
|
"- OU le modèle est en train de sur-segmenter des extrêmes (typique finance)\n",
|
|||
|
|
"Donc K=10 n’est pas automatiquement “mieux” au sens business, c’est “mieux” pour le fit probabiliste.\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 27,
|
|||
|
|
"id": "47e0a0bd-0a26-4e56-91cb-5780a3c1fb86",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAUNNJREFUeJzt3XlYVGX/P/D3sA0IzuACDCgCaiqYS6LiuC8kKlY+akpuuG+giSVJuZKKaa65ZRr4qGRZaiWJIm6puJGkovCgolgKmAqjpqz374++nJ8jqEDAoOf9uq65nubcn3PO59zQw7tzzpxRCCEEiIiIiGTMyNANEBERERkaAxERERHJHgMRERERyR4DEREREckeAxERERHJHgMRERERyR4DEREREckeAxERERHJHgMRERERyR4DERGVC2dnZwwfPtzQbdBzLF68GHXr1oWxsTGaN28OoPg/t7CwMCgUCly7dq1ceySqKAxERKWQnJwMf39/NGjQAFWqVEGVKlXg5uYGPz8/nDt3Tq92zpw5UCgUMDIywo0bNwptS6fTwcLCAgqFAv7+/tLya9euQaFQQKFQYN68eUX2MXjwYCgUClhZWZXtAVKpXbx4EXPmzKn0QWHfvn0IDAxEu3btEBoaigULFhi6JSKDMjF0A0Qvm927d2PgwIEwMTHB4MGD0axZMxgZGSEhIQE7duzA2rVrkZycDCcnJ731lEolvvnmGwQGBuot37Fjx3P3Z25ujm+++QYzZszQW/7w4UP8+OOPMDc3L5sDK2OJiYkwMpLff3NdvHgRc+fORefOneHs7Gzodp7pwIEDMDIywsaNG2FmZiYtl+vPjYi/9UQlcOXKFfj4+MDJyQkJCQlYs2YNxo0bhzFjxmDJkiVISkrC8uXLi/yD0qtXL3zzzTeFloeHh8Pb2/uZ++zVqxcuXryI33//XW/5jz/+iOzsbLz55pv//sDKgVKphKmpqaHboGdIT0+HhYWFXhgC+HMj+WIgIiqBRYsW4eHDhwgNDYW9vX2hcRMTE0yePBmOjo6FxgYNGoS4uDgkJCRIy1JTU3HgwAEMGjTomfvUarVwcXFBeHi43vKtW7eiR48eqF69erF6P3fuHIYPH466devC3NwcGo0GI0eOxJ07d/Tq7t+/jylTpsDZ2RlKpRK2trZ488038dtvv0k1SUlJ6NevHzQaDczNzVG7dm34+PggMzNTqinqXpRz586hU6dOsLCwQO3atTFv3jyEhoYWuhfF2dkZvXv3xtGjR9G6dWuYm5ujbt26+O9//6u3vYL7WI4ePYrJkyfDxsYG1tbWGDduHLKzs5GRkYFhw4ahWrVqqFatGgIDAyGE0NtGfn4+li9fjsaNG8Pc3Bx2dnYYN24c7t27p1dXnJ7CwsLw7rvvAgC6dOkiXfI8dOjQc382CQkJGDBgAGxsbGBhYYGGDRvik08+0as5e/YsevbsCZVKBSsrK3Tr1g0nTpwocj6OHTuGqVOnwsbGBpaWlvjPf/6D27dvS3UKhQKhoaF4+PCh1GNYWNgzf27x8fHo2rWr3s8tPz+/yGPZs2cPOnToAEtLS1StWhXe3t6Ij4/Xqxk+fDisrKzw559/ok+fPrCysoKNjQ0+/PBD5OXl6dXm5+djxYoVaNKkCczNzWFjY4MePXrgzJkzenVbtmyBu7s7LCwsUL16dfj4+BR5iZroWXjJjKgEdu/ejfr168PDw6PE63bs2BG1a9dGeHg4goODAQDffvstrKysnnuGCADee+89bNmyBQsXLoRCocBff/2Fffv2YfPmzYiMjCzW/qOionD16lWMGDECGo0G8fHxWL9+PeLj43HixAkoFAoAwPjx4/H999/D398fbm5uuHPnDo4ePYpLly6hRYsWyM7OhpeXF7KysjBp0iRoNBr8+eef2L17NzIyMqBWq4vc/59//imFhKCgIFhaWmLDhg1QKpVF1l++fBn9+/fHqFGj4Ovri6+//hrDhw+Hu7s7GjdurFdb0MfcuXNx4sQJrF+/HtbW1jh+/Djq1KmDBQsW4JdffsHixYvx+uuvY9iwYdK648aNQ1hYGEaMGIHJkycjOTkZq1atwtmzZ3Hs2DG9syUv6qljx46YPHkyVq5ciY8//hiurq4AIP1vUc6dO4cOHTrA1NQUY8eOhbOzM65cuYKff/4Z8+fPB/BPIOnQoQNUKhUCAwNhamqKL7/8Ep07d8bhw4cL/T5OmjQJ1apVw+zZs3Ht2jUsX74c/v7++PbbbwEAmzdvxvr163Hq1Cls2LABANC2bdsi+0tNTUWXLl2Qm5uL6dOnw9LSEuvXr4eFhUWh2s2bN8PX1xdeXl747LPP8Pfff2Pt2rVo3749zp49q3cJMS8vD15eXvDw8MDnn3+O/fv3Y8mSJahXrx4mTJgg1Y0aNQphYWHo2bMnRo8ejdzcXPz66684ceIEWrZsCQCYP38+Zs6ciQEDBmD06NG4ffs2vvjiC3Ts2BFnz56FtbX1M+efSCKIqFgyMzMFANGnT59CY/fu3RO3b9+WXn///bc0Nnv2bAFA3L59W3z44Yeifv360lirVq3EiBEjhBBCABB+fn7SWHJysgAgFi9eLC5cuCAAiF9//VUIIcTq1auFlZWVePjwofD19RWWlpYv7P/Jngp88803AoA4cuSItEytVuv18bSzZ88KAGL79u3P3Z+Tk5Pw9fWV3k+aNEkoFApx9uxZadmdO3dE9erVBQCRnJyst+7TfaWnpwulUik++OADaVloaKgAILy8vER+fr60XKvVCoVCIcaPHy8ty83NFbVr1xadOnWSlv36668CgNi6date75GRkYWWF7en7du3CwDi4MGDz52fAh07dhRVq1YV169f11v+5PH06dNHmJmZiStXrkjLbt68KapWrSo6duxYaD48PT311g8ICBDGxsYiIyNDWvas35unf25TpkwRAMTJkyf1jlutVuv93O7fvy+sra3FmDFj9LaXmpoq1Gq13nJfX18BQAQHB+vVvvHGG8Ld3V16f+DAAQFATJ48uVCfBcd37do1YWxsLObPn683fv78eWFiYlJoOdGz8JIZUTHpdDoAKPITXZ07d4aNjY30Wr16dZHbGDRoEC5fvozTp09L//u8y2UFGjdujKZNm0r3IIWHh+Odd95BlSpVit3/k/9F//jxY/z1119o06YNAOhdDrO2tsbJkydx8+bNIrdTcAZo7969+Pvvv4u9/8jISGi1Wunj3QBQvXp1DB48uMh6Nzc3dOjQQXpvY2ODhg0b4urVq4VqR40aJZ3hAgAPDw8IITBq1ChpmbGxMVq2bKm3/vbt26FWq/Hmm2/ir7/+kl7u7u6wsrLCwYMHS91Tcdy+fRtHjhzByJEjUadOHb2xguPJy8vDvn370KdPH9StW1cat7e3x6BBg3D06FHpd7PA2LFj9eajQ4cOyMvLw/Xr10vc4y+//II2bdqgdevW0jIbG5tCP7eoqChkZGTgvffe05tLY2NjeHh4FJpL4J+zkU/q0KGD3lz+8MMPUCgUmD17dqF1C45vx44dyM/Px4ABA/T2q9Fo8NprrxW5X6Ki8JIZUTFVrVoVAPDgwYNCY19++SXu37+PtLQ0DBky5JnbeOONN9CoUSOEh4fD2toaGo0GXbt2Ldb+Bw0ahCVLliAgIADHjx/Hxx9/XKL+7969i7lz52Lbtm1IT0/XG3vy3p9FixbB19cXjo6OcHd3R69evTBs2DDpj7GLiwumTp2KpUuXYuvWrejQoQPefvttDBky5JmXywDg+vXr0Gq1hZbXr1+/yPqnAwIAVKtWrdC9PUXVFvTx9L1carVab/2kpCRkZmbC1ta2yB6enqeS9FQcBX/8X3/99WfW3L59G3///TcaNmxYaMzV1RX5+fm4ceOG3mXEp/usVq0aAJSqz+vXrxd5ifjpfpKSkgDgmb/PKpVK733B/UBP9/lkj1euXIGDg8Nz75NLSkqCEAKvvfZakeO8QZyKi4GIqJjUajXs7e1x4cKFQmMFfzCK8+yZQYMGYe3atahatSoGDhxY7I84v/feewgKCsKYMWNQo0YNdO/evUT9DxgwAMe
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Share of ambiguous assignments (max prob < 0.6): 1.10%\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"gmm = models[best_k_gmm]\n",
|
|||
|
|
"labels_gmm = gmm.predict(X_scaled)\n",
|
|||
|
|
"proba = gmm.predict_proba(X_scaled)\n",
|
|||
|
|
"max_proba = proba.max(axis=1)\n",
|
|||
|
|
"\n",
|
|||
|
|
"dfc.loc[X.index, \"cluster_gmm\"] = labels_gmm\n",
|
|||
|
|
"dfc.loc[X.index, \"cluster_confidence\"] = max_proba\n",
|
|||
|
|
"\n",
|
|||
|
|
"# Confidence histogram\n",
|
|||
|
|
"plt.figure()\n",
|
|||
|
|
"plt.hist(max_proba, bins=50)\n",
|
|||
|
|
"plt.xlabel(\"Max posterior probability\")\n",
|
|||
|
|
"plt.ylabel(\"Count\")\n",
|
|||
|
|
"plt.title(\"GMM assignment confidence\")\n",
|
|||
|
|
"plt.show()\n",
|
|||
|
|
"\n",
|
|||
|
|
"ambiguous_share = (max_proba < 0.6).mean()\n",
|
|||
|
|
"print(f\"Share of ambiguous assignments (max prob < 0.6): {ambiguous_share:.2%}\")\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 28,
|
|||
|
|
"id": "d4254585-959f-4f79-9c08-73e6d7be25c5",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/html": [
|
|||
|
|
"<div>\n",
|
|||
|
|
"<style scoped>\n",
|
|||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
|
" vertical-align: middle;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe tbody tr th {\n",
|
|||
|
|
" vertical-align: top;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"\n",
|
|||
|
|
" .dataframe thead th {\n",
|
|||
|
|
" text-align: right;\n",
|
|||
|
|
" }\n",
|
|||
|
|
"</style>\n",
|
|||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
|
" <thead>\n",
|
|||
|
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th>n_clients</th>\n",
|
|||
|
|
" <th>aum_qty_med</th>\n",
|
|||
|
|
" <th>freq_med</th>\n",
|
|||
|
|
" <th>rel_int_med</th>\n",
|
|||
|
|
" <th>gross_flow_med</th>\n",
|
|||
|
|
" <th>n_tx_med</th>\n",
|
|||
|
|
" <th>vol_med</th>\n",
|
|||
|
|
" <th>conf_med</th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>cluster_gmm</th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" <th></th>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </thead>\n",
|
|||
|
|
" <tbody>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>9.0</th>\n",
|
|||
|
|
" <td>3509</td>\n",
|
|||
|
|
" <td>7.949166e+01</td>\n",
|
|||
|
|
" <td>0.041667</td>\n",
|
|||
|
|
" <td>3.071429e+00</td>\n",
|
|||
|
|
" <td>6.687053e+00</td>\n",
|
|||
|
|
" <td>2.0</td>\n",
|
|||
|
|
" <td>3.277044e+01</td>\n",
|
|||
|
|
" <td>0.998364</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>1.0</th>\n",
|
|||
|
|
" <td>2953</td>\n",
|
|||
|
|
" <td>9.121500e+01</td>\n",
|
|||
|
|
" <td>0.000000</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>0.0</td>\n",
|
|||
|
|
" <td>0.000000e+00</td>\n",
|
|||
|
|
" <td>0.999999</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>6.0</th>\n",
|
|||
|
|
" <td>1426</td>\n",
|
|||
|
|
" <td>5.339202e+02</td>\n",
|
|||
|
|
" <td>0.224745</td>\n",
|
|||
|
|
" <td>2.483775e+00</td>\n",
|
|||
|
|
" <td>2.762873e+01</td>\n",
|
|||
|
|
" <td>38.0</td>\n",
|
|||
|
|
" <td>7.746854e+01</td>\n",
|
|||
|
|
" <td>0.999916</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>5.0</th>\n",
|
|||
|
|
" <td>787</td>\n",
|
|||
|
|
" <td>1.454968e+04</td>\n",
|
|||
|
|
" <td>0.673267</td>\n",
|
|||
|
|
" <td>5.310471e+00</td>\n",
|
|||
|
|
" <td>1.064410e+03</td>\n",
|
|||
|
|
" <td>485.0</td>\n",
|
|||
|
|
" <td>1.867233e+03</td>\n",
|
|||
|
|
" <td>0.999937</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>0.0</th>\n",
|
|||
|
|
" <td>568</td>\n",
|
|||
|
|
" <td>5.309168e+03</td>\n",
|
|||
|
|
" <td>0.087500</td>\n",
|
|||
|
|
" <td>6.195235e+00</td>\n",
|
|||
|
|
" <td>6.734335e+02</td>\n",
|
|||
|
|
" <td>5.0</td>\n",
|
|||
|
|
" <td>2.910116e+03</td>\n",
|
|||
|
|
" <td>0.998759</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>8.0</th>\n",
|
|||
|
|
" <td>223</td>\n",
|
|||
|
|
" <td>1.840341e+05</td>\n",
|
|||
|
|
" <td>0.680851</td>\n",
|
|||
|
|
" <td>6.853229e+00</td>\n",
|
|||
|
|
" <td>1.521262e+04</td>\n",
|
|||
|
|
" <td>418.0</td>\n",
|
|||
|
|
" <td>3.652430e+04</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>4.0</th>\n",
|
|||
|
|
" <td>198</td>\n",
|
|||
|
|
" <td>1.656004e+05</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>4.315459e+00</td>\n",
|
|||
|
|
" <td>8.092373e+03</td>\n",
|
|||
|
|
" <td>3570.0</td>\n",
|
|||
|
|
" <td>9.626152e+03</td>\n",
|
|||
|
|
" <td>0.999323</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>7.0</th>\n",
|
|||
|
|
" <td>77</td>\n",
|
|||
|
|
" <td>1.109350e+00</td>\n",
|
|||
|
|
" <td>0.130435</td>\n",
|
|||
|
|
" <td>1.462797e+02</td>\n",
|
|||
|
|
" <td>9.281811e+01</td>\n",
|
|||
|
|
" <td>15.0</td>\n",
|
|||
|
|
" <td>2.553362e+02</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>2.0</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>7.941176e-05</td>\n",
|
|||
|
|
" <td>0.014706</td>\n",
|
|||
|
|
" <td>1.257232e+06</td>\n",
|
|||
|
|
" <td>1.468240e+00</td>\n",
|
|||
|
|
" <td>1.0</td>\n",
|
|||
|
|
" <td>1.210741e+01</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" <tr>\n",
|
|||
|
|
" <th>3.0</th>\n",
|
|||
|
|
" <td>1</td>\n",
|
|||
|
|
" <td>1.319632e+08</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" <td>3.814096e+00</td>\n",
|
|||
|
|
" <td>3.871695e+06</td>\n",
|
|||
|
|
" <td>27668.0</td>\n",
|
|||
|
|
" <td>1.194728e+07</td>\n",
|
|||
|
|
" <td>1.000000</td>\n",
|
|||
|
|
" </tr>\n",
|
|||
|
|
" </tbody>\n",
|
|||
|
|
"</table>\n",
|
|||
|
|
"</div>"
|
|||
|
|
],
|
|||
|
|
"text/plain": [
|
|||
|
|
" n_clients aum_qty_med freq_med rel_int_med gross_flow_med \\\n",
|
|||
|
|
"cluster_gmm \n",
|
|||
|
|
"9.0 3509 7.949166e+01 0.041667 3.071429e+00 6.687053e+00 \n",
|
|||
|
|
"1.0 2953 9.121500e+01 0.000000 0.000000e+00 0.000000e+00 \n",
|
|||
|
|
"6.0 1426 5.339202e+02 0.224745 2.483775e+00 2.762873e+01 \n",
|
|||
|
|
"5.0 787 1.454968e+04 0.673267 5.310471e+00 1.064410e+03 \n",
|
|||
|
|
"0.0 568 5.309168e+03 0.087500 6.195235e+00 6.734335e+02 \n",
|
|||
|
|
"8.0 223 1.840341e+05 0.680851 6.853229e+00 1.521262e+04 \n",
|
|||
|
|
"4.0 198 1.656004e+05 1.000000 4.315459e+00 8.092373e+03 \n",
|
|||
|
|
"7.0 77 1.109350e+00 0.130435 1.462797e+02 9.281811e+01 \n",
|
|||
|
|
"2.0 1 7.941176e-05 0.014706 1.257232e+06 1.468240e+00 \n",
|
|||
|
|
"3.0 1 1.319632e+08 1.000000 3.814096e+00 3.871695e+06 \n",
|
|||
|
|
"\n",
|
|||
|
|
" n_tx_med vol_med conf_med \n",
|
|||
|
|
"cluster_gmm \n",
|
|||
|
|
"9.0 2.0 3.277044e+01 0.998364 \n",
|
|||
|
|
"1.0 0.0 0.000000e+00 0.999999 \n",
|
|||
|
|
"6.0 38.0 7.746854e+01 0.999916 \n",
|
|||
|
|
"5.0 485.0 1.867233e+03 0.999937 \n",
|
|||
|
|
"0.0 5.0 2.910116e+03 0.998759 \n",
|
|||
|
|
"8.0 418.0 3.652430e+04 1.000000 \n",
|
|||
|
|
"4.0 3570.0 9.626152e+03 0.999323 \n",
|
|||
|
|
"7.0 15.0 2.553362e+02 1.000000 \n",
|
|||
|
|
"2.0 1.0 1.210741e+01 1.000000 \n",
|
|||
|
|
"3.0 27668.0 1.194728e+07 1.000000 "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 28,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"g_profile = (\n",
|
|||
|
|
" dfc.loc[X.index]\n",
|
|||
|
|
" .groupby(\"cluster_gmm\")\n",
|
|||
|
|
" .agg(\n",
|
|||
|
|
" n_clients=(ID_COL, \"count\"),\n",
|
|||
|
|
" aum_qty_med=(\"aum_qty_mean\", \"median\"),\n",
|
|||
|
|
" freq_med=(\"frequency\", \"median\"),\n",
|
|||
|
|
" rel_int_med=(\"rel_intensity_total\", \"median\"),\n",
|
|||
|
|
" gross_flow_med=(\"gross_flow_qty_mean\", \"median\"),\n",
|
|||
|
|
" n_tx_med=(\"n_tx_total\", \"median\"),\n",
|
|||
|
|
" vol_med=(\"net_flow_qty_vol\", \"median\"),\n",
|
|||
|
|
" conf_med=(\"cluster_confidence\", \"median\"),\n",
|
|||
|
|
" )\n",
|
|||
|
|
" .sort_values(\"n_clients\", ascending=False)\n",
|
|||
|
|
")\n",
|
|||
|
|
"\n",
|
|||
|
|
"g_profile\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 29,
|
|||
|
|
"id": "84d1790a-7901-473e-b367-b8464886ada2",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Silhouette K-means: 0.5887519415047723\n",
|
|||
|
|
"Silhouette GMM: 0.138325887586186\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"#comapraison kmeans et gmm\n",
|
|||
|
|
"\n",
|
|||
|
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
|
"\n",
|
|||
|
|
"sil_km = silhouette_score(X_scaled, dfc.loc[X.index, \"cluster_kmeans\"])\n",
|
|||
|
|
"sil_gmm = silhouette_score(X_scaled, dfc.loc[X.index, \"cluster_gmm\"])\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"Silhouette K-means:\", sil_km)\n",
|
|||
|
|
"print(\"Silhouette GMM:\", sil_gmm)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 30,
|
|||
|
|
"id": "c4576042-09e9-4f03-b99b-883d8fa3426f",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"DB index K-means (lower better): 0.5656170003935967\n",
|
|||
|
|
"DB index GMM (lower better): 1.4187135041840173\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"''' \n",
|
|||
|
|
"Davies–Bouldin index (plus bas = mieux)\n",
|
|||
|
|
"Mesure similarité entre clusters.\n",
|
|||
|
|
"Plus bas = clusters plus distincts.'''\n",
|
|||
|
|
"\n",
|
|||
|
|
"from sklearn.metrics import davies_bouldin_score\n",
|
|||
|
|
"\n",
|
|||
|
|
"db_km = davies_bouldin_score(X_scaled, dfc.loc[X.index, \"cluster_kmeans\"])\n",
|
|||
|
|
"db_gmm = davies_bouldin_score(X_scaled, dfc.loc[X.index, \"cluster_gmm\"])\n",
|
|||
|
|
"\n",
|
|||
|
|
"print(\"DB index K-means (lower better):\", db_km)\n",
|
|||
|
|
"print(\"DB index GMM (lower better):\", db_gmm)\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"id": "7d4d8264-359f-40c6-bb4d-4e26d1d74621",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": []
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.13.11"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 5
|
|||
|
|
}
|