Project_Carmignac/.ipynb_checkpoints/dataloader-checkpoint.ipynb

4440 lines
396 KiB
Plaintext
Raw Permalink Normal View History

2025-11-14 16:27:55 +01:00
{
2025-11-28 17:29:51 +01:00
"cells": [
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 1,
2025-11-28 17:29:51 +01:00
"id": "2e8cf88b-cecf-409f-9c2d-c3762b233f05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2026-02-01 22:06:59 +01:00
"Requirement already satisfied: openpyxl in /opt/python/lib/python3.13/site-packages (3.1.5)\n",
"Requirement already satisfied: pycountry in /opt/python/lib/python3.13/site-packages (24.6.1)\n",
"Requirement already satisfied: et-xmlfile in /opt/python/lib/python3.13/site-packages (from openpyxl) (2.0.0)\n"
]
}
],
"source": [
"!pip install openpyxl pycountry"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0c550103-c268-4c43-8a76-4b7633418091",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaleido in /opt/python/lib/python3.13/site-packages (1.2.0)\n",
"Requirement already satisfied: choreographer>=1.1.1 in /opt/python/lib/python3.13/site-packages (from kaleido) (1.2.1)\n",
"Requirement already satisfied: logistro>=1.0.8 in /opt/python/lib/python3.13/site-packages (from kaleido) (2.0.1)\n",
"Requirement already satisfied: orjson>=3.10.15 in /opt/python/lib/python3.13/site-packages (from kaleido) (3.11.5)\n",
"Requirement already satisfied: packaging in /opt/python/lib/python3.13/site-packages (from kaleido) (25.0)\n",
"Requirement already satisfied: pytest-timeout>=2.4.0 in /opt/python/lib/python3.13/site-packages (from kaleido) (2.4.0)\n",
"Requirement already satisfied: simplejson>=3.19.3 in /opt/python/lib/python3.13/site-packages (from choreographer>=1.1.1->kaleido) (3.20.2)\n",
"Requirement already satisfied: pytest>=7.0.0 in /opt/python/lib/python3.13/site-packages (from pytest-timeout>=2.4.0->kaleido) (9.0.2)\n",
"Requirement already satisfied: iniconfig>=1.0.1 in /opt/python/lib/python3.13/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (2.3.0)\n",
"Requirement already satisfied: pluggy<2,>=1.5 in /opt/python/lib/python3.13/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (1.6.0)\n",
"Requirement already satisfied: pygments>=2.7.2 in /opt/python/lib/python3.13/site-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido) (2.19.2)\n"
2025-11-28 17:29:51 +01:00
]
}
],
"source": [
2026-02-01 22:06:59 +01:00
"!pip install --upgrade kaleido"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 3,
2025-11-28 17:29:51 +01:00
"id": "126c8a80-d9ad-4816-84f0-0c3d580f62c8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 4,
2025-11-28 17:29:51 +01:00
"id": "ff2261fb-9516-4410-b42d-3acc8dc1a460",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
2026-02-01 22:06:59 +01:00
"os.environ[\"AWS_ACCESS_KEY_ID\"] = 'NJ0M3U6LSD2MKTFHA9E9'\n",
"os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'qS4fOELSpa4DhuhpbByIqF2A6WPX7bEXGoNro8qA'\n",
"os.environ[\"AWS_SESSION_TOKEN\"] = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJOSjBNM1U2TFNEMk1LVEZIQTlFOSIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiKiJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTc2OTc5NDYzOCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJzYXJhaC50aG91bXlyZUBlbnNhZS5mciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJleHAiOjE3NzEwMDQzNDQsImZhbWlseV9uYW1lIjoiVEhPVU1ZUkUiLCJnaXZlbl9uYW1lIjoiU2FyYWgiLCJncm91cHMiOlsiYmRjLWRhdGEiLCJiZGMtY2FybWlnbmFjLWczIl0sImlhdCI6MTc2OTc5NDc0NCwiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiIwODI5ODRiOC0zZjg0LTQ4MTYtYjVlOC0zNWE5NWE1ODM4ZTciLCJuYW1lIjoiU2FyYWggVEhPVU1ZUkUiLCJwb2xpY3kiOiJzdHNvbmx5IiwicHJlZmVycmVkX3VzZXJuYW1lIjoic3Rob3VteXJlLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjUwYWJiODMyLTBkNDAtNDVjZS04OWM4LWJmNzVlMDUzNjUyNyIsInN1YiI6ImVhYWVkN2QyLWM4MjYtNGIxNC05MzczLTYwYjNhODhlMWFiNiIsInR5cCI6IkJlYXJlciJ9.B6ak9wmZSk4l0_9O1CV03rdBgkz9y-tV6OrKHDCLbNGBJuZXDPX4jhvLUCwxsq99cd5bKwioh3Lzq_FWQvp_tA'\n",
2025-11-28 17:29:51 +01:00
"os.environ[\"AWS_DEFAULT_REGION\"] = 'us-east-1'\n",
"fs = s3fs.S3FileSystem(\n",
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
]
},
{
"cell_type": "markdown",
"id": "3d36f3f0-bd40-4a83-96d1-b46d75f5a4c5",
"metadata": {},
"source": [
"# data exploration"
]
},
{
"cell_type": "markdown",
"id": "eaf5c5a0-eb1c-4242-b893-7600e6def109",
"metadata": {},
"source": [
"Fonctions utiles"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 5,
2025-11-28 17:29:51 +01:00
"id": "60e2035c-c2f0-4c51-97df-102e67ba96ee",
"metadata": {},
"outputs": [],
"source": [
"def plot_account(account_id, isin=None):\n",
" \"\"\"\n",
" Plots the stock (Quantity - AUM) evolution for a given Registrar Account.\n",
" Optionally, only for one ISIN.\n",
" \"\"\"\n",
"\n",
" df = merged.copy()\n",
"\n",
" # Filter by account\n",
" df = df[df[\"Registrar Account - ID\"] == account_id]\n",
"\n",
" if isin is not None:\n",
" df = df[df[\"Product - Isin\"] == isin]\n",
"\n",
" if df.empty:\n",
" print(f\"No data found for account {account_id}\")\n",
" return\n",
"\n",
" df_plot = df.groupby(\"Centralisation Date\")[\"Quantity - AUM\"].sum().reset_index()\n",
"\n",
" df_plot = df_plot.sort_values(\"Centralisation Date\")\n",
"\n",
" # Plot\n",
" plt.figure(figsize=(12, 4))\n",
" plt.plot(df_plot[\"Centralisation Date\"], df_plot[\"Quantity - AUM\"], marker='o')\n",
" plt.title(f\"Stock Evolution for Account {account_id}\", fontsize=14)\n",
" plt.xlabel(\"Date\")\n",
" plt.ylabel(\"Total AUM\")\n",
" plt.grid(True)\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 6,
2025-11-28 17:29:51 +01:00
"id": "37e008b1-32d4-44be-9d23-1b90a5a26f89",
"metadata": {},
2026-02-01 22:06:59 +01:00
"outputs": [],
"source": [
"# 2. BASIC INSPECTION\n",
"\n",
"def quick_info(df, name):\n",
" print(\"\\n\" + \"=\"*80)\n",
" print(f\"DATASET : {name}\")\n",
" print(\"=\"*80)\n",
" print(\"\\nShape :\", df.shape)\n",
" print(\"\\nColumns :\", df.columns.tolist())\n",
" print(\"\\nDtypes :\\n\", df.dtypes)\n",
" print(\"\\nMissing values (%) :\\n\", df.isna().mean().sort_values(ascending=False)*100)\n",
" print(\"\\nSample rows:\\n\", df.head(5))\n",
" print(\"\\nUnique values per column:\\n\", df.nunique().sort_values(ascending=False))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e67a99ea-ddf4-4627-8f48-ec183c671acb",
"metadata": {},
2025-11-28 17:29:51 +01:00
"outputs": [
2026-02-01 22:06:59 +01:00
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_3144/2877144729.py:5: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" flows = pd.read_csv(f, sep=\";\")\n",
"/tmp/ipykernel_3144/2877144729.py:12: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" stocks = pd.read_csv(f, sep=\";\")\n"
]
},
2025-11-28 17:29:51 +01:00
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"================================================================================\n",
"DATASET : STOCKS\n",
"================================================================================\n",
"\n",
"Shape : (4880297, 18)\n",
"\n",
"Columns : ['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - ID', 'Registrar Account - Region', 'RegistrarAccount - Country', 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type', 'Product - Shareclass Currency', 'Product - Isin', 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY', 'Value - AUM €']\n",
"\n",
"Dtypes :\n",
" Agreement - Code object\n",
"Company - Id object\n",
"Company - Ultimate Parent Id object\n",
"Registrar Account - ID object\n",
"Registrar Account - Region object\n",
"RegistrarAccount - Country object\n",
"Product - Asset Type object\n",
"Product - Strategy object\n",
"Product - Legal Status object\n",
"Product - Is Dedie ? object\n",
"Product - Fund object\n",
"Product - Shareclass Type object\n",
"Product - Shareclass Currency object\n",
"Product - Isin object\n",
"Centralisation Date object\n",
"Quantity - AUM float64\n",
"Value - AUM CCY float64\n",
"Value - AUM € float64\n",
"dtype: object\n",
"\n",
"Missing values (%) :\n",
" Product - Asset Type 6.471553\n",
"Company - Id 2.330801\n",
"Company - Ultimate Parent Id 2.330801\n",
"Product - Strategy 0.001537\n",
"Product - Shareclass Type 0.000717\n",
"Agreement - Code 0.000000\n",
"RegistrarAccount - Country 0.000000\n",
"Registrar Account - Region 0.000000\n",
"Product - Legal Status 0.000000\n",
"Registrar Account - ID 0.000000\n",
"Product - Is Dedie ? 0.000000\n",
"Product - Fund 0.000000\n",
"Product - Shareclass Currency 0.000000\n",
"Product - Isin 0.000000\n",
"Centralisation Date 0.000000\n",
"Quantity - AUM 0.000000\n",
"Value - AUM CCY 0.000000\n",
"Value - AUM € 0.000000\n",
"dtype: float64\n",
"\n",
"Sample rows:\n",
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 3 166.0 166.0 \n",
"1 3 166.0 166.0 \n",
"2 3 166.0 166.0 \n",
"3 3 166.0 166.0 \n",
"4 3 166.0 166.0 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 \n",
"\n",
"Unique values per column:\n",
" Value - AUM € 1697923\n",
"Value - AUM CCY 1689620\n",
"Quantity - AUM 554404\n",
"Registrar Account - ID 15532\n",
"Agreement - Code 2521\n",
"Company - Id 1970\n",
"Company - Ultimate Parent Id 1392\n",
"Product - Isin 491\n",
"Centralisation Date 130\n",
"Product - Fund 74\n",
"Product - Strategy 52\n",
"RegistrarAccount - Country 39\n",
"Registrar Account - Region 15\n",
"Product - Shareclass Type 11\n",
"Product - Legal Status 6\n",
"Product - Shareclass Currency 6\n",
"Product - Asset Type 5\n",
"Product - Is Dedie ? 2\n",
"dtype: int64\n",
"\n",
"================================================================================\n",
"DATASET : FLOWS\n",
"================================================================================\n",
"\n",
"Shape : (2574461, 24)\n",
"\n",
"Columns : ['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - ID', 'Registrar Account - Region', 'RegistrarAccount - Country', 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type', 'Product - Shareclass Currency', 'Product - Isin', 'Centralisation Date', 'Quantity - Subscription', 'Quantity - Redemption', 'Quantity - NetFlows', 'Value Ccy - Subscription', 'Value Ccy - Redemption', 'Value Ccy - NetFlows', 'Value € - Subscription', 'Value € - Redemption', 'Value € - NetFlows']\n",
"\n",
"Dtypes :\n",
" Agreement - Code object\n",
"Company - Id object\n",
"Company - Ultimate Parent Id object\n",
"Registrar Account - ID object\n",
"Registrar Account - Region object\n",
"RegistrarAccount - Country object\n",
"Product - Asset Type object\n",
"Product - Strategy object\n",
"Product - Legal Status object\n",
"Product - Is Dedie ? object\n",
"Product - Fund object\n",
"Product - Shareclass Type object\n",
"Product - Shareclass Currency object\n",
"Product - Isin object\n",
"Centralisation Date object\n",
"Quantity - Subscription float64\n",
"Quantity - Redemption float64\n",
"Quantity - NetFlows float64\n",
"Value Ccy - Subscription float64\n",
"Value Ccy - Redemption float64\n",
"Value Ccy - NetFlows float64\n",
"Value € - Subscription float64\n",
"Value € - Redemption float64\n",
"Value € - NetFlows float64\n",
"dtype: object\n",
"\n",
"Missing values (%) :\n",
" Product - Asset Type 0.079589\n",
"Company - Id 0.059818\n",
"Company - Ultimate Parent Id 0.059818\n",
"Product - Strategy 0.000233\n",
"Product - Shareclass Type 0.000078\n",
"Registrar Account - ID 0.000000\n",
"RegistrarAccount - Country 0.000000\n",
"Agreement - Code 0.000000\n",
"Registrar Account - Region 0.000000\n",
"Product - Legal Status 0.000000\n",
"Product - Is Dedie ? 0.000000\n",
"Product - Fund 0.000000\n",
"Product - Shareclass Currency 0.000000\n",
"Product - Isin 0.000000\n",
"Centralisation Date 0.000000\n",
"Quantity - Subscription 0.000000\n",
"Quantity - Redemption 0.000000\n",
"Quantity - NetFlows 0.000000\n",
"Value Ccy - Subscription 0.000000\n",
"Value Ccy - Redemption 0.000000\n",
"Value Ccy - NetFlows 0.000000\n",
"Value € - Subscription 0.000000\n",
"Value € - Redemption 0.000000\n",
"Value € - NetFlows 0.000000\n",
"dtype: float64\n",
"\n",
"Sample rows:\n",
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"\n",
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
"0 SICAV NO ... 2020-11-05 \n",
"1 FCP NO ... 2015-03-09 \n",
"2 FCP NO ... 2016-10-26 \n",
"3 FCP NO ... 2018-10-18 \n",
"4 FCP NO ... 2019-04-08 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
"[5 rows x 24 columns]\n",
"\n",
"Unique values per column:\n",
" Value € - NetFlows 2018916\n",
"Value Ccy - NetFlows 1972319\n",
"Value € - Redemption 1323531\n",
"Value Ccy - Redemption 1296468\n",
"Value € - Subscription 955890\n",
"Value Ccy - Subscription 926633\n",
"Quantity - NetFlows 667586\n",
"Quantity - Redemption 374378\n",
"Quantity - Subscription 359661\n",
"Registrar Account - ID 9805\n",
"Centralisation Date 2780\n",
"Company - Id 1929\n",
"Agreement - Code 1626\n",
"Company - Ultimate Parent Id 1283\n",
"Product - Isin 474\n",
"Product - Fund 70\n",
"Product - Strategy 49\n",
"RegistrarAccount - Country 34\n",
"Registrar Account - Region 15\n",
"Product - Shareclass Type 10\n",
"Product - Shareclass Currency 6\n",
"Product - Legal Status 6\n",
"Product - Asset Type 5\n",
"Product - Is Dedie ? 2\n",
"dtype: int64\n",
"\n",
"================================================================================\n",
"DATASET : NAV/PRICES\n",
"================================================================================\n",
"\n",
"Shape : (30333, 13)\n",
"\n",
"Columns : ['NavDate', 'LegalForm', 'Cod', 'PortfolioName', 'PTFCurrency', 'PortfolioAum_Eur', 'ShareClassIsin', 'ShareClassName', 'ShareClassCurrency', 'ShareClassPrice', 'NumberOfShares', 'ShareClassAumLocalCur', 'ShareClassAum_EUR']\n",
"\n",
"Dtypes :\n",
" 0\n",
"NavDate object\n",
"LegalForm object\n",
"Cod object\n",
"PortfolioName object\n",
"PTFCurrency object\n",
"PortfolioAum_Eur object\n",
"ShareClassIsin object\n",
"ShareClassName object\n",
"ShareClassCurrency object\n",
"ShareClassPrice object\n",
"NumberOfShares object\n",
"ShareClassAumLocalCur object\n",
"ShareClassAum_EUR object\n",
"dtype: object\n",
"\n",
"Missing values (%) :\n",
" 0\n",
"NavDate 0.0\n",
"LegalForm 0.0\n",
"Cod 0.0\n",
"PortfolioName 0.0\n",
"PTFCurrency 0.0\n",
"PortfolioAum_Eur 0.0\n",
"ShareClassIsin 0.0\n",
"ShareClassName 0.0\n",
"ShareClassCurrency 0.0\n",
"ShareClassPrice 0.0\n",
"NumberOfShares 0.0\n",
"ShareClassAumLocalCur 0.0\n",
"ShareClassAum_EUR 0.0\n",
"dtype: float64\n",
"\n",
"Sample rows:\n",
" 0 NavDate LegalForm Cod PortfolioName \\\n",
"0 31/12/2009 SICAV CC Carmignac Portfolio Climate Transition \n",
"1 31/12/2009 SICAV CFB Carmignac Portfolio Flexible Bond \n",
"2 31/12/2009 FCP CCT Carmignac Court Terme \n",
"3 31/12/2009 FCP CE Carmignac Emergents \n",
"4 31/12/2009 SICAV CAD Carmignac Portfolio Asia Discovery \n",
"\n",
"0 PTFCurrency PortfolioAum_Eur ShareClassIsin ShareClassName \\\n",
"0 EUR 941059600 LU0164455502 A EUR ACC \n",
"1 EUR 57063272.31 LU0336084032 A EUR ACC \n",
"2 EUR 788828666.5 FR0010149161 A EUR ACC \n",
"3 EUR 1508087050 FR0010149302 A EUR ACC \n",
"4 EUR 149490224.2 LU0336083810 A EUR ACC \n",
"\n",
"0 ShareClassCurrency ShareClassPrice NumberOfShares ShareClassAumLocalCur \\\n",
"0 EUR 287.21 3276555.83 941059600 \n",
"1 EUR 1016.833 56118.62745 57063272.31 \n",
"2 EUR 3687.84 213899.9161 788828666.5 \n",
"3 EUR 559.82 2693878.478 1508087050 \n",
"4 EUR 884.9 168934.5962 149490224.2 \n",
"\n",
"0 ShareClassAum_EUR \n",
"0 941059600 \n",
"1 57063272.31 \n",
"2 788828666.5 \n",
"3 1508087050 \n",
"4 149490224.2 \n",
"\n",
"Unique values per column:\n",
" 0\n",
"ShareClassAum_EUR 30211\n",
"ShareClassAumLocalCur 30032\n",
"NumberOfShares 28910\n",
"ShareClassPrice 14747\n",
"PortfolioAum_Eur 5505\n",
"ShareClassIsin 416\n",
"NavDate 210\n",
"ShareClassName 90\n",
"Cod 55\n",
"PortfolioName 55\n",
"LegalForm 6\n",
"ShareClassCurrency 6\n",
"PTFCurrency 2\n",
"dtype: int64\n"
]
}
],
"source": [
2026-02-01 22:06:59 +01:00
"with fs.open(\n",
" \"projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv\",\n",
" \"rb\"\n",
") as f:\n",
2025-11-28 17:29:51 +01:00
" flows = pd.read_csv(f, sep=\";\")\n",
"\n",
"with fs.open('projet-bdc-data/carmignac/Monthly AUM and NAV since 2010.xlsx', 'rb') as f:\n",
" nav_raw = pd.read_excel(f, header=None, engine=\"openpyxl\")\n",
"nav = nav_raw[0].str.split(\",\", expand=True)\n",
"nav.columns = nav.iloc[0]\n",
2026-02-01 22:06:59 +01:00
"with fs.open('projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
" stocks = pd.read_csv(f, sep=\";\")\n",
"\n",
2025-11-28 17:29:51 +01:00
"nav = nav[1:].reset_index(drop=True)\n",
"\n",
2026-02-01 22:06:59 +01:00
"quick_info(stocks, \"STOCKS\")\n",
"quick_info(flows, \"FLOWS\")\n",
"quick_info(nav, \"NAV/PRICES\")"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 8,
2025-11-28 17:29:51 +01:00
"id": "9bc92c9f-216c-475e-bfb8-edc1a4e839f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date conversion done.\n",
"NAV numeric conversion done.\n",
"String normalization done.\n",
"\n",
"ISIN missing in FLOWS but present in STOCKS : 17\n",
"\n",
"ISIN missing in STOCKS but present in FLOWS : 0\n",
"\n",
"ISIN missing in NAV but present in FLOWS : 67\n",
"\n",
"ISIN missing in NAV but present in STOCKS : 76\n",
"\n",
"Accounts in STOCKS but NEVER in FLOWS : 5777\n",
"\n",
"Accounts in FLOWS but NEVER in STOCKS : 118\n",
"\n",
"CLIENT BEHAVIOR (first 5 rows):\n",
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \n",
"0 49.280511 0.000 -109.238 -1.092380e+11 \n",
"1 NaN 0.000 -660.115 -6.601150e+11 \n",
"2 NaN 0.000 -174.646 -1.746460e+11 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+11 \n",
"\n",
"FUND BEHAVIOR (first 5 rows):\n",
" Product - Isin n_accounts n_days total_netflows vol_flows\n",
"0 FR0010135103 2690 2723 -2.571327e+07 2622.609244\n",
"1 FR0010147603 733 2719 -2.562187e+06 1206.248205\n",
"2 FR0010148981 1841 2722 -3.609440e+06 1051.069183\n",
"3 FR0010148999 454 2306 -7.130297e+05 1265.364138\n",
2026-02-01 22:06:59 +01:00
"4 FR0010149112 934 2000 -9.438901e+05 1834.961721\n"
2025-11-28 17:29:51 +01:00
]
}
],
"source": [
"# 1. CLEAN DATES (formats différents)\n",
"\n",
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"], errors=\"coerce\")\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"nav[\"NavDate\"] = pd.to_datetime(nav[\"NavDate\"], format=\"%d/%m/%Y\", errors=\"coerce\")\n",
"\n",
"print(\"Date conversion done.\")\n",
"\n",
"# 2. CLEAN NUMERIC COLUMNS FOR NAV FILE\n",
"\n",
"num_cols = [\"PortfolioAum_Eur\",\"ShareClassPrice\",\"NumberOfShares\",\n",
" \"ShareClassAumLocalCur\",\"ShareClassAum_EUR\"]\n",
"\n",
"for col in num_cols:\n",
" nav[col] = (\n",
" nav[col]\n",
" .astype(str)\n",
" .str.replace(\",\", \".\", regex=False)\n",
" .str.replace(\" \", \"\")\n",
" .astype(float)\n",
" )\n",
"\n",
"print(\"NAV numeric conversion done.\")\n",
"\n",
"# 3. STANDARDIZE STRINGS FOR JOIN KEYS\n",
"\n",
"def norm(df):\n",
" for col in df.columns:\n",
" if df[col].dtype == \"object\":\n",
" df[col] = df[col].astype(str).str.strip().str.upper()\n",
" return df\n",
"\n",
"stocks = norm(stocks)\n",
"flows = norm(flows)\n",
"nav = norm(nav)\n",
"\n",
"print(\"String normalization done.\")\n",
"\n",
"\n",
"# 4. ANALYSE RELATIONS ACROSS FILES\n",
"\n",
"# Unique sets\n",
"isin_stocks = set(stocks[\"Product - Isin\"].unique())\n",
"isin_flows = set(flows[\"Product - Isin\"].unique())\n",
"isin_nav = set(nav[\"ShareClassIsin\"].unique())\n",
"\n",
"print(\"\\nISIN missing in FLOWS but present in STOCKS :\", len(isin_stocks - isin_flows))\n",
"print(\"\\nISIN missing in STOCKS but present in FLOWS :\", len(isin_flows - isin_stocks))\n",
"print(\"\\nISIN missing in NAV but present in FLOWS :\", len(isin_flows - isin_nav))\n",
"print(\"\\nISIN missing in NAV but present in STOCKS :\", len(isin_stocks - isin_nav))\n",
"\n",
"\n",
"# 5. CLIENTS: STOCKS VS FLOWS\n",
"\n",
"acc_stocks = set(stocks[\"Registrar Account - ID\"].unique())\n",
"acc_flows = set(flows[\"Registrar Account - ID\"].unique())\n",
"\n",
"print(\"\\nAccounts in STOCKS but NEVER in FLOWS :\", len(acc_stocks - acc_flows))\n",
"print(\"\\nAccounts in FLOWS but NEVER in STOCKS :\", len(acc_flows - acc_stocks))\n",
"\n",
"\n",
"# 6. CLIENT ACTIVITY METRICS (DETAILED)\n",
"\n",
"client_behavior = flows.groupby(\"Registrar Account - ID\").agg(\n",
" n_days=(\"Centralisation Date\", lambda x: x.nunique()),\n",
" n_transactions=(\"Quantity - NetFlows\", \"count\"),\n",
" total_netflows=(\"Quantity - NetFlows\", \"sum\"),\n",
" mean_flow=(\"Quantity - NetFlows\", \"mean\"),\n",
" std_flow=(\"Quantity - NetFlows\", \"std\"),\n",
" total_subscription=(\"Quantity - Subscription\", \"sum\"),\n",
" total_redemption=(\"Quantity - Redemption\", \"sum\")\n",
").reset_index()\n",
"\n",
"# Add churn metric\n",
"client_behavior[\"churn_ratio\"] = (\n",
" client_behavior[\"total_redemption\"] /\n",
" (client_behavior[\"total_subscription\"] + 1e-9)\n",
")\n",
"\n",
"print(\"\\nCLIENT BEHAVIOR (first 5 rows):\\n\", client_behavior.head())\n",
"\n",
"\n",
"# 7. FUNDS ACTIVITY METRICS\n",
"\n",
"fund_behavior = flows.groupby(\"Product - Isin\").agg(\n",
" n_accounts=(\"Registrar Account - ID\", \"nunique\"),\n",
" n_days=(\"Centralisation Date\", lambda x: x.nunique()),\n",
" total_netflows=(\"Quantity - NetFlows\", \"sum\"),\n",
" vol_flows=(\"Quantity - NetFlows\", \"std\")\n",
").reset_index()\n",
"\n",
"print(\"\\nFUND BEHAVIOR (first 5 rows):\\n\", fund_behavior.head())\n",
"\n",
"\n",
"# 8. SAVE INTERMEDIATE\n",
"\n",
"client_behavior.to_csv(\"client_behavior.csv\", index=False)\n",
"fund_behavior.to_csv(\"fund_behavior.csv\", index=False)"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 9,
2025-11-28 17:29:51 +01:00
"id": "afb51598-3a7b-41f2-8d25-5b4b8bfb1c8a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FULL usable ISIN : 407\n",
"Stocks only ISIN : 17\n",
"Flows only ISIN : 0\n",
"Missing NAV : 76\n",
"All ISIN groups saved into 4 separate files.\n"
]
}
],
"source": [
"valid_full = isin_stocks & isin_flows & isin_nav\n",
"stocks_only = isin_stocks - isin_flows\n",
"flows_only = isin_flows - isin_stocks\n",
"missing_nav = (isin_stocks | isin_flows) - isin_nav\n",
"\n",
"print(\"FULL usable ISIN :\", len(valid_full))\n",
"print(\"Stocks only ISIN :\", len(stocks_only))\n",
"print(\"Flows only ISIN :\", len(flows_only))\n",
"print(\"Missing NAV :\", len(missing_nav))\n",
"\n",
"pd.DataFrame({\"isin\": list(valid_full)}).to_csv(\"isin_full.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(stocks_only)}).to_csv(\"isin_stocks_only.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(flows_only)}).to_csv(\"isin_flows_only.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(missing_nav)}).to_csv(\"isin_missing_nav.csv\", index=False)\n",
"\n",
2026-02-01 22:06:59 +01:00
"print(\"All ISIN groups saved into 4 separate files.\")"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 10,
2025-11-28 17:29:51 +01:00
"id": "61e0c71a-a1c6-4ed8-ba15-b7a9badc4d4a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \\\n",
"0 49.280511 0.000 -109.238 -1.092380e+08 \n",
"1 NaN 0.000 -660.115 -6.601150e+08 \n",
"2 NaN 0.000 -174.646 -1.746460e+08 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+08 \n",
"\n",
" churn_flag activity_score flow_volatility inertia_ratio \n",
"0 0 1.386294 49.280511 0.998921 \n",
"1 0 0.693147 0.000000 0.999640 \n",
"2 0 0.693147 0.000000 0.999640 \n",
"3 0 4.304065 2168.971331 0.976619 \n",
"4 0 0.693147 0.000000 0.999640 \n"
]
}
],
"source": [
"eps = 1e-6\n",
"\n",
"client_behavior[\"churn_ratio\"] = (\n",
" client_behavior[\"total_redemption\"] /\n",
" (client_behavior[\"total_subscription\"] + eps)\n",
")\n",
"\n",
"client_behavior[\"churn_flag\"] = (\n",
" client_behavior[\"total_redemption\"] > client_behavior[\"total_subscription\"]\n",
").astype(int)\n",
"\n",
"client_behavior[\"activity_score\"] = np.log1p(client_behavior[\"n_transactions\"])\n",
"\n",
"client_behavior[\"flow_volatility\"] = client_behavior[\"std_flow\"].fillna(0)\n",
"\n",
"client_behavior[\"inertia_ratio\"] = (\n",
" 1 - client_behavior[\"n_days\"] / flows[\"Centralisation Date\"].nunique()\n",
")\n",
"\n",
"print(client_behavior.head())\n",
"\n",
"client_behavior.to_csv(\"client_behavior_clean.csv\", index=False)\n"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 11,
2025-11-28 17:29:51 +01:00
"id": "8ee7e911-eb73-4846-b545-661140411c1b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2026-02-01 22:06:59 +01:00
"/tmp/ipykernel_3144/1645623303.py:17: RuntimeWarning: invalid value encountered in scalar divide\n",
2025-11-28 17:29:51 +01:00
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_isin_held n_funds_held n_asset_types \\\n",
"0 100000014 1 1 1 \n",
"1 100000016 2 2 2 \n",
"2 100000028 1 1 1 \n",
"3 100000038 3 3 2 \n",
"4 100000042 1 1 1 \n",
"\n",
" n_strategies total_aum median_aum concentration_ratio \n",
"0 1 0.0000 0.0 NaN \n",
"1 2 0.0000 0.0 NaN \n",
"2 1 126236.2184 0.0 1.0 \n",
"3 3 0.0000 0.0 NaN \n",
"4 1 446362.9015 0.0 1.0 \n",
" n_isin_held n_funds_held n_asset_types n_strategies total_aum \\\n",
"count 12501.000000 12501.000000 12501.000000 12501.000000 1.250100e+04 \n",
"mean 5.514759 4.408367 2.082473 4.109271 4.218474e+08 \n",
"std 10.434698 5.472756 1.254048 4.714800 5.618341e+09 \n",
"min 1.000000 1.000000 1.000000 1.000000 -2.586805e+08 \n",
"25% 1.000000 1.000000 1.000000 1.000000 0.000000e+00 \n",
"50% 2.000000 2.000000 2.000000 2.000000 2.587605e+05 \n",
"75% 6.000000 5.000000 3.000000 5.000000 8.817014e+06 \n",
"max 469.000000 67.000000 6.000000 48.000000 4.780234e+11 \n",
"\n",
" median_aum concentration_ratio \n",
"count 1.250100e+04 7708.000000 \n",
"mean 2.573991e+05 0.790503 \n",
"std 3.487976e+06 0.261535 \n",
"min -2.317333e+06 -2.591840 \n",
"25% 0.000000e+00 0.576503 \n",
"50% 0.000000e+00 0.972159 \n",
"75% 1.474502e+02 1.000000 \n",
"max 2.215373e+08 2.983529 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2026-02-01 22:06:59 +01:00
"/tmp/ipykernel_3144/1645623303.py:17: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
2025-11-28 17:29:51 +01:00
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n"
]
}
],
"source": [
"# Diversification per account\n",
"account_div = stocks.groupby(\"Registrar Account - ID\").agg(\n",
" n_isin_held=(\"Product - Isin\", \"nunique\"),\n",
" n_funds_held=(\"Product - Fund\", \"nunique\"),\n",
" n_asset_types=(\"Product - Asset Type\", \"nunique\"),\n",
" n_strategies=(\"Product - Strategy\", \"nunique\"),\n",
" total_aum=(\"Value - AUM €\", \"sum\"),\n",
" median_aum=(\"Value - AUM €\", \"median\")\n",
").reset_index()\n",
"\n",
"# Concentration ratio per account\n",
"aum_by_account_fund = stocks.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Fund\"]\n",
")[\"Value - AUM €\"].sum().reset_index()\n",
"\n",
"concentration = aum_by_account_fund.groupby(\"Registrar Account - ID\") \\\n",
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n",
" .reset_index(name=\"concentration_ratio\")\n",
"\n",
"# Merge diversification + concentration\n",
"account_static = account_div.merge(concentration, on=\"Registrar Account - ID\", how=\"left\")\n",
"\n",
"print(account_static.head())\n",
"print(account_static.describe())\n"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 12,
2025-11-28 17:29:51 +01:00
"id": "76f6fa0d-9d7a-4145-af1c-986d83947f91",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID country region\n",
"0 100000014 SWITZERLAND SWITZERLAND\n",
"1 100000016 UNITED KINGDOM UNITED KINGDOM\n",
"2 100000028 UNITED KINGDOM UNITED KINGDOM\n",
"3 100000038 SWITZERLAND SWITZERLAND\n",
"4 100000042 UNITED KINGDOM UNITED KINGDOM\n"
]
}
],
"source": [
"# Geographic info per account\n",
"geo = stocks.groupby(\"Registrar Account - ID\").agg(\n",
" country=(\"RegistrarAccount - Country\", lambda x: x.mode()[0]),\n",
" region=(\"Registrar Account - Region\", lambda x: x.mode()[0])\n",
").reset_index()\n",
"\n",
"print(geo.head())\n"
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 21,
"id": "b31584e4-b3c4-4fdd-9fd9-87372a7440ec",
2025-11-28 17:29:51 +01:00
"metadata": {},
"outputs": [
{
"data": {
2026-02-01 22:06:59 +01:00
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"colorscale": [
[
0,
"rgb(255,247,236)"
],
[
1,
"rgb(255,247,236)"
]
],
"customdata": {
"bdata": "AAAAAAAAJEAAAAAAAAAIQAAAAAAAABBAAAAAAAAAGEAAAAAAAAAYQAAAAAAAACJAAAAAAAAAFEAAAAAAAAAkQAAAAAAAAABAAAAAAAAAFEAAAAAAAAAAQAAAAAAAABRAAAAAAAAACEAAAAAAAADwPwAAAAAAAPA/AAAAAAAAAEAAAAAAAAAUQAAAAAAAABhA",
"dtype": "f8",
"shape": "18, 1"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>account_bin=Very low (19)<br>iso3=%{location}<br>n_accounts=%{customdata[0]}<extra></extra>",
"hovertext": [
"CANADA",
"CZECH REPUBLIC",
"DENMARK",
"FINLAND",
"GREECE",
"HONG KONG",
"IRELAND",
"ISRAEL",
"JAPAN",
"LIECHTENSTEIN",
"LITHUANIA",
"MALTA",
"MAURITIUS",
"NORWAY",
"POLAND",
"SOUTH AFRICA",
"TAIWAN",
"UNITED ARAB EMIRATES"
],
"locations": [
"CAN",
"CZE",
"DNK",
"FIN",
"GRC",
"HKG",
"IRL",
"ISR",
"JPN",
"LIE",
"LTU",
"MLT",
"MUS",
"NOR",
"POL",
"ZAF",
"TWN",
"ARE"
],
"name": "Very low (19)",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(254,232,200)"
],
[
1,
"rgb(254,232,200)"
]
],
"customdata": {
"bdata": "AAAAAAAALEAAAAAAAAA0QAAAAAAAAEJAAAAAAAAALkAAAAAAAAA1QAAAAAAAgENAF1100UUXSEAXXXTRRRdIQBdddNFFF0hAF1100UUXSEAXXXTRRRdIQBdddNFFF0hAF1100UUXSEAXXXTRRRdIQBdddNFFF0hAF1100UUXSEAXXXTRRRdIQA==",
"dtype": "f8",
"shape": "17, 1"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>account_bin=Low (1049)<br>iso3=%{location}<br>n_accounts=%{customdata[0]}<extra></extra>",
"hovertext": [
"AUSTRIA",
"MONACO",
"NETHERLANDS",
"PORTUGAL",
"SINGAPORE",
"SWEDEN",
"ARGENTINA",
"BRAZIL",
"CHILE",
"COLOMBIA",
"MEXICO",
"PERU",
"URUGUAY",
"PARAGUAY",
"BOLIVIA",
"ECUADOR",
"VENEZUELA"
],
"locations": [
"AUT",
"MCO",
"NLD",
"PRT",
"SGP",
"SWE",
"ARG",
"BRA",
"CHL",
"COL",
"MEX",
"PER",
"URY",
"PRY",
"BOL",
"ECU",
"VEN"
],
"name": "Low (1049)",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(253,212,158)"
],
[
1,
"rgb(253,212,158)"
]
],
"customdata": {
"bdata": "AAAAAABAZEAAAAAAAIBWQA==",
"dtype": "f8",
"shape": "2, 1"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>account_bin=Medium (50199)<br>iso3=%{location}<br>n_accounts=%{customdata[0]}<extra></extra>",
"hovertext": [
"BELGIUM",
"GERMANY"
],
"locations": [
"BEL",
"DEU"
],
"name": "Medium (50199)",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(253,187,132)"
],
[
1,
"rgb(253,187,132)"
]
],
"customdata": {
"bdata": "AAAAAABwfkAAAAAAALB0QAAAAAAAQHVAAAAAAABwdEAAAAAAAHCDQA==",
"dtype": "f8",
"shape": "5, 1"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>account_bin=High (200999)<br>iso3=%{location}<br>n_accounts=%{customdata[0]}<extra></extra>",
"hovertext": [
"ITALY",
"LUXEMBOURG",
"SPAIN",
"SWITZERLAND",
"UNITED KINGDOM"
],
"locations": [
"ITA",
"LUX",
"ESP",
"CHE",
"GBR"
],
"name": "High (200999)",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1,
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(252,141,89)"
],
[
1,
"rgb(252,141,89)"
]
],
"customdata": {
"bdata": "AAAAAADgpEAAAAAAADiSQA==",
"dtype": "f8",
"shape": "2, 1"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>account_bin=Very high (≥ 1,000)<br>iso3=%{location}<br>n_accounts=%{customdata[0]}<extra></extra>",
"hovertext": [
"FRANCE",
"UNITED STATES"
],
"locations": [
"FRA",
"USA"
],
"name": "Very high (≥ 1,000)",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1
]
}
],
"layout": {
"geo": {
"center": {},
"coastlinecolor": "rgba(0,0,0,0.3)",
"domain": {
"x": [
0,
1
],
"y": [
0,
1
]
},
"projection": {
"type": "natural earth"
},
"showcoastlines": true,
"showframe": false
},
"legend": {
"title": {
"text": "Number of registrar accounts<br>with flow activity"
},
"tracegroupgap": 0
},
"margin": {
"b": 0,
"l": 0,
"r": 0,
"t": 40
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermap": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermap"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
}
}
},
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAzkAAAFoCAYAAAB0XzViAAAQAElEQVR4AexdBUBVSRf+Ht0NIqg0SCqIiYHdXWutHb+uXWussbp269q19trd3YGYCIISIkpId/9zrr63DwQFBQUddO6dOHPmzHcfvPnmzMyVyeY/HAGOAEeAI8AR4AhwBDgCHAGOAEfgB0JABvyHI8ARyAMBnsUR4AhwBDgCHAGOAEeAI1BaEeAkp7Q+OW43R4AjwBH4HgjwNjkCHAGOAEeAI1AKEOAkpxQ8JG4iR4AjwBHgCHAEOAIlGwFuHUeAI1CyEOAkp2Q9D24NR4AjwBHgCHAEOAIcAY4AR+BHQeC79YOTnO8GPW+YI8AR4AhwBDgCHAGOAEeAI8ARKA4EOMkpDlS5zqJDgGviCHAEOAIcAY4AR4AjwBHgCBQSAU5yCgkYF+cIcAQ4AiUBAW4DR4AjwBHgCHAEOAL5I8BJTv7Y8BKOAEeAI8AR4AhwBEoXAtxajgBHgCMgIMBJjgADv3AEOAIcAY4AR4AjwBHgCHAEflQEfr5+cZLz8z1z3mOOAEeAI8AR4AhwBDgCHAGOwA+NACc5P/TjLbrOcU0cAY4AR4AjwBHgCHAEOAIcgdKCACc5peVJcTs5AhyBkogAt4kjwBHgCHAEOAIcgRKIACc5JfChcJM4AhwBjgBHgCNQuhHg1nMEOAIcge+LACc53xd/3jpHgCPAEeAIcAQ4AhwBjsDPggDv5zdDgJOcbwY1b4gjwBHgCHAEOAIcAY4AR4AjwBH4FghwkvMtUC66NrgmjgBHgCPAEeAIcAQ4AhwBjgBH4DMIcJLzGYB4MUeAI1AaEOA2cgQ4AhwBjgBHgCPAEfgPAU5y/sOCxzgCHAGOAEeAI/BjIcB7wxHgCHAEflIEOMn5SR887zZHgCPAEeAIcAQ4AhyBnxUB3u8fHwFOcn78Z8x7yBHgCHAEOAIcAY4AR4AjwBH4qRDgJOeLHjevxBHgCHAEOAIcAY4AR4AjwBHgCJRUBDjJKalPhtvFESiNCHCbOQIcAY4AR4AjwBHgCJQABDjJKQEPgZvAEeAIcAQ4Aj82Arx3HAGOAEeAI/BtEeAk59vizVvjCHAEOAIcAY4AR4AjwBF4jwC/cgSKDQFOcooNWq6YI8AR4AhwBDgCHAGOAEeAI8AR+B4IlG6S8z0Q421yBDgCHAGOAEeAI8AR4AhwBDgCJRoBTnJK9OPhxnEEvgwBXosjwBHgCHAEOAIcAY7Az4wAJzk/89PnfecIcAQ4Aj8XAry3HAGOAEeAI/CTIMBJzk/yoHk3OQIcAY4AR4AjwBHgCOSNAM/lCPx4CHCS8+M9U94jjgBHgCPAEeAIcAQ4AhwBjsBPjUCRkJyfGkHeeY4AR4AjwBHgCHAEOAIcAY4AR6BEIcBJTol6HNyYHwwB3h2OAEeAI8AR4AhwBDgCHIHvgAAnOd8BdN4kR4AjwBH4uRHgvecIcAQ4AhwBjkDxIsBJTvHiy7VzBDgCHAGOAEeAI8ARKBgCXIojwBEoMgQ4ySkyKLkijgBHgCPAEeAIcAQ4AhwBjgBHoKgR+BJ9nOR8CWq8DkeAI8AR4AhwBDgCHAGOAEeAI1BiEeAkp8Q+Gm5Y0SHANXEEOAIcAY4AR4AjwBHgCPxMCHCS8zM9bd5XjgBHgCMgjQCPcwQ4AhwBjgBH4AdFgJOcH/TB8m5xBDgCHAGOAEeAI/BlCPBaHAGOQOlHgJOc0v8MeQ84AhwBjgBHgCPAEeAIcAQ4AsWNQKnSz0lOqXpc3FiOAEeAI8AR4AhwBDgCHAGOAEfgcwhwkvM5hHh50SHANXEEOAIcAY4AR4AjwBHgCHAEvgECnOR8A5B5ExwBjgBH4FMI8DKOAEeAI8AR4AhwBIoWAU5yihZPro0jwBHgCHAEOAIcgaJBgGvhCHAEOAJfjAAnOV8MHa/IEeAIcAQ4AhwBjgBHgCPAEfjWCPD2CoIAJzkFQYnLcAQ4AhwBjgBHgCPAEeAIcAQ4AqUGAU5ySs2jKjpDuSaOQH4IZGRkICUlBUlJSUhMTERCQgLi4+MRFxcn3CmdnJyMtLQ0ZGdn56eG53MEOAIcAY7AD4BAUnIK+o2ej027T5ao3ly6+QD27n2EQPaRnSXKQG5MiUCAk5wS8Ri4ET8CArGxsbh16xZOnDiBXbt2YceOHdi/fz82b96M2bNnY/78+Vi7di3mzp2LxYsXY/r06UKYPHkyNm7cKJE/cOAAKISEhBQJLJmZmaDg7e2NCxcu4tbtO7hy7Tqz9Q58nj+Hn58ffHx8MGrMOGzdvgvnL17F+UvXcOHKDVy6eguXrt3GtVseuHj1Ni5cvonT567g2MmzOHL8FI6dOI0bN2/jzZs3CA8PR3R0NLKysorE7u+ghDfJEeAIcARKFAKT524QBvJ0lzbM63kgmnYbD7pL5/8M8YjIGMxftRur5oyE1+Wt2Lx0IlSUlb5p14n0Nes+AWTL5xomQlZQ2c/p+hnLCesvJbKc5PyMnxje5wIjQAP24OBgEIE5ffq0QETWrVuHGTNmYNCgQRjMwqjRo7FgwQJMnToZFy+cxa0bVxHyyh/vwkLg7fUYcqIM2FpWgImxPgy0VWFhUhYONqao4lQRrpVsYV7BCMkJMXj7Ogi+Pl548sgTb1h82LBh6NmzJzq0by+QkMjISLx+/Rr+/v54zsjJkydPcc/DUwheXl44fPgIPD098c+2bTh95hyuXL2Gh48eMbumYdHSFfh90mT8vXY9VLUMYWBsAVVtQ0QlZOJtZCLCopPRrFUH1GvYAi7V68C1Rl24snuV6rXZvTYcKrmiXAVTaOroQVlVA/JKapCVV0GWSAFvI2Jw7uI17Nl/GBs2bcGyZcsLjC8X5AhwBEoDAtzG74lAubL6uHTjAWiw/D3tKClth7+LgbqaCvseNftuJvXv1gKndy2Avq7Wd7OBN/x5BDjJ+TxGXOInQYAITUxMjOB9IdJw5swZNG3aFP369cPECeNx/x7zWDDyoa2uCHsbM/Tr2QlDB/ZCp9ZN4Opkg5UL/8SUcb9h9rQJGD/qfxg1bAD+mDgSv3briI7tWuKXTm3RoW0LdOnQGk0buaNNyyZo3aIxBvTphuFD+rLyNswbEoG7Hg/gcf8hlJWVoaqqCl09ffj5v4LHQy+8fPUWr8NiEBGTiqQMOcgoaUJeRQd3H3jhwRMvHD91DvEJabCyc4FBOUukZiuiRt1GMNAvCztHZwz833Do6RtAU1NLuJuamsHSygZW1hXhVMlZaC+vx338yEGcPnEMDz3vIeDlc7wLD8WTx544cmAPDu3fjeSkeGioq0NLRw9Kqmp4/OQJ7tzzwKPHTxAYGIir12/i0JET2HfgMA4dPcnCCRw8fBi+vr55NcfzciFAywhzZZXoJNkbGhrKvXol+ilx40oLAlWcrFHfzRnb951FfsuyiADl9haQl0fa2yOeEV++8YDgHbJ37wNxHfIUUZpCXrPmCYnJwrI1KqdA8rnxozwqo1Cz1VCJl4lsJp3UvlhG3C7y+RHL5dZFfRowdgGe+QbCveMooR/U97zUUHvU7vHztwQ50iWWJT1kI+VRoPakdZCHphnz1FCZOFCa8klOrDspOYWSwnOhtsSydCcZamfynA0IfhMusZfa+hQmVE71xUG63U/VEwzJdSEbxHroTn0mm6TFqE/UBpVTyC2T2x7SKa4vXZa7HmFNekm/WJ7azuszKf2MpPWQjiXr9uLOA29UbT5EeI7i9qmM7BWH3G1RmzJ04YEj8LMiQB6a7du3YzTzxjRr1pR5Tnrg0sXz+HvVCjxiXhBjY2Po6WqjZlVn2NlYCnc9XR3o6mghPj4Rj58+w8PHT3H/4RP0HTIa127cKRCUcfEJzPtxFQuWrcHgEb+jWbseGDt5FkLehqFOXXd07t4bPXoPQJt2nTFx6l+owrwqjpWrwsrGAZbWtjC3tEL5CiYoW9YIBmXKoGnz1hg8dDQGDBmBDl17QFlFBZqMyJQvb4Kq1Wqgaau2+G3kOFSq5FIg+3ILte/UFX0H/g9duv2Kth1/QbOWbdG2Q1f07DsI436fgYZN2+LFC+a
2025-11-28 17:29:51 +01:00
},
"metadata": {},
2026-02-01 22:06:59 +01:00
"output_type": "display_data"
2025-11-28 17:29:51 +01:00
}
],
"source": [
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# GEOGRAPHIC COVERAGE — AUM MAP (ACCOUNTS WITH ≥1 FLOW ONLY)\n",
"# ============================================================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.express as px\n",
"import pycountry\n",
"\n",
"# ------------------------------------------------------------\n",
"# Helper: country name → ISO-3 code\n",
"# ------------------------------------------------------------\n",
"\n",
"def country_to_iso3(name):\n",
" try:\n",
" return pycountry.countries.lookup(name).alpha_3\n",
" except:\n",
" return None\n",
"\n",
"# Harmonize country labels\n",
"stocks[\"RegistrarAccount - Country\"] = stocks[\"RegistrarAccount - Country\"].replace({\n",
" \"US OFFSHORE\": \"UNITED STATES\"\n",
"})\n",
"\n",
"flows[\"RegistrarAccount - Country\"] = flows[\"RegistrarAccount - Country\"].replace({\n",
" \"US OFFSHORE\": \"UNITED STATES\"\n",
"})\n",
"\n",
"# ------------------------------------------------------------\n",
"# 0. Identify registrar accounts with at least one non-zero flow\n",
"# ------------------------------------------------------------\n",
"\n",
"active_accounts = (\n",
" flows.loc[flows[\"Value € - NetFlows\"] != 0, \"Registrar Account - ID\"]\n",
" .unique()\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Restrict STOCKS to accounts with flows\n",
"stocks_active = stocks[\n",
" stocks[\"Registrar Account - ID\"].isin(active_accounts)\n",
"].copy()\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ------------------------------------------------------------\n",
"# 1. Aggregate number of ACTIVE accounts per country\n",
"# ------------------------------------------------------------\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"country_accounts = (\n",
" stocks_active\n",
" .groupby(\"RegistrarAccount - Country\")[\"Registrar Account - ID\"]\n",
" .nunique()\n",
" .reset_index(name=\"n_accounts\")\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"# ------------------------------------------------------------\n",
"# 2. Handle LATAM explicitly (visual redistribution)\n",
"# ------------------------------------------------------------\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"latam_countries = [\n",
" \"ARGENTINA\", \"BRAZIL\", \"CHILE\", \"COLOMBIA\", \"MEXICO\",\n",
" \"PERU\", \"URUGUAY\", \"PARAGUAY\", \"BOLIVIA\",\n",
" \"ECUADOR\", \"VENEZUELA\"\n",
"]\n",
"\n",
"latam_value = country_accounts.loc[\n",
" country_accounts[\"RegistrarAccount - Country\"] == \"LATAM\",\n",
" \"n_accounts\"\n",
"]\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"if not latam_value.empty:\n",
" latam_share = latam_value.iloc[0] / len(latam_countries)\n",
"\n",
" latam_df = pd.DataFrame({\n",
" \"RegistrarAccount - Country\": latam_countries,\n",
" \"n_accounts\": latam_share\n",
" })\n",
"\n",
" country_accounts = pd.concat(\n",
" [\n",
" country_accounts[\n",
" ~country_accounts[\"RegistrarAccount - Country\"].isin(\n",
" [\"LATAM\", \"INTERNATIONAL\", \"UNKNOWN\", \"US OFFSHORE\"]\n",
" )\n",
" ],\n",
" latam_df\n",
" ],\n",
" ignore_index=True\n",
" )\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"else:\n",
" country_accounts = country_accounts[\n",
" ~country_accounts[\"RegistrarAccount - Country\"].isin(\n",
" [\"INTERNATIONAL\", \"UNKNOWN\", \"US OFFSHORE\"]\n",
" )\n",
" ]\n",
"\n",
"# ------------------------------------------------------------\n",
"# 3. Convert country names to ISO-3\n",
"# ------------------------------------------------------------\n",
"\n",
"country_accounts[\"iso3\"] = country_accounts[\"RegistrarAccount - Country\"].apply(country_to_iso3)\n",
"\n",
"# Diagnostic: unmapped values\n",
"unmapped = country_accounts[country_accounts[\"iso3\"].isna()]\n",
"if not unmapped.empty:\n",
" print(\"Unmapped country labels:\")\n",
" print(unmapped[\"RegistrarAccount - Country\"].unique())\n",
"\n",
"country_accounts = country_accounts.dropna(subset=[\"iso3\"])\n",
"\n",
"# ------------------------------------------------------------\n",
"# 4. Discretize by order of magnitude (explicit labels)\n",
"# ------------------------------------------------------------\n",
"\n",
"bins = [0, 10, 50, 200, 1000, np.inf]\n",
"labels = [\n",
" \"Very low (19)\",\n",
" \"Low (1049)\",\n",
" \"Medium (50199)\",\n",
" \"High (200999)\",\n",
" \"Very high (≥ 1,000)\"\n",
"]\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"country_accounts[\"account_bin\"] = pd.cut(\n",
" country_accounts[\"n_accounts\"],\n",
" bins=bins,\n",
" labels=labels,\n",
" include_lowest=True\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"# ------------------------------------------------------------\n",
"# 5. World map — AUM coverage (active accounts only)\n",
"# ------------------------------------------------------------\n",
"\n",
"fig = px.choropleth(\n",
" country_accounts,\n",
" locations=\"iso3\",\n",
" color=\"account_bin\",\n",
" hover_name=\"RegistrarAccount - Country\",\n",
" hover_data={\"n_accounts\": True},\n",
" category_orders={\"account_bin\": labels},\n",
" color_discrete_sequence=px.colors.sequential.OrRd,\n",
" #title=\"Geographic distribution of registrar accounts with observed flows\"\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"fig.update_layout(\n",
" margin=dict(l=0, r=0, t=40, b=0),\n",
" legend_title_text=\"Number of registrar accounts<br>with flow activity\",\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=True,\n",
" coastlinecolor=\"rgba(0,0,0,0.3)\",\n",
" projection_type=\"natural earth\"\n",
" )\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"\n",
"fig.show()\n"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 14,
"id": "18dd9229-825c-4bcf-b802-cecb1d51871e",
2025-11-28 17:29:51 +01:00
"metadata": {},
"outputs": [
{
2026-02-01 22:06:59 +01:00
"name": "stdout",
"output_type": "stream",
"text": [
"Global raw totals (flows):\n",
" - sum netflows €: -18256951755.077896\n",
" - sum abs netflows €: 283031932185.3349\n",
" - n rows: 2574461\n",
" - n accounts: 6842\n",
"\n",
"Accounts with >1 country label in STOCKS (should be low): 136\n",
"\n",
"Top accounts with multi-country labels (examples):\n",
" Registrar Account - ID country n_countries_in_stocks\n",
"8724 406197 SWITZERLAND 16\n",
"6933 366024 SWEDEN 14\n",
"12030 420207 LUXEMBOURG 13\n",
"12499 OFF DISTRIBUTION LUXEMBOURG 13\n",
"3539 200127410 SWITZERLAND 12\n",
"5534 348454 SWITZERLAND 11\n",
"6352 365376 SWITZERLAND 11\n",
"5470 307388 SWITZERLAND 9\n",
"36 13463 SWITZERLAND 8\n",
"12400 422778 LUXEMBOURG 7\n",
"\n",
"Top 15 countries by total_abs_flows (robust):\n",
" country total_abs_flows total_signed_flows n_accounts_active \\\n",
"13 ITALY 4.263941e+10 -3.201953e+09 478 \n",
"26 SPAIN 4.099818e+10 1.575358e+09 330 \n",
"6 FRANCE 3.488683e+10 -4.092174e+09 2627 \n",
"17 LUXEMBOURG 3.100404e+10 -1.909475e+09 310 \n",
"7 GERMANY 1.502239e+10 -4.128081e+09 80 \n",
"1 BELGIUM 1.437804e+10 -3.546591e+09 145 \n",
"28 SWITZERLAND 1.141769e+10 -2.356821e+09 275 \n",
"31 UNITED KINGDOM 3.397122e+09 -1.094136e+08 605 \n",
"27 SWEDEN 1.038465e+09 8.745456e+07 37 \n",
"14 JAPAN 6.973177e+08 -2.703144e+08 2 \n",
"20 MONACO 6.387020e+08 -9.208744e+07 12 \n",
"21 NETHERLANDS 5.477981e+08 -2.581311e+08 32 \n",
"23 PORTUGAL 5.187647e+08 3.011212e+07 14 \n",
"0 AUSTRIA 3.782388e+08 2.282761e+07 9 \n",
"32 UNITED STATES 3.641362e+08 3.272825e+07 1161 \n",
"\n",
" n_flow_days share_multicountry_accounts avg_abs_flows_per_active_account \n",
"13 2758 0.022587 8.920378e+07 \n",
"26 2718 0.054746 1.242369e+08 \n",
"6 2724 0.034566 1.328010e+07 \n",
"17 2719 0.574681 1.000130e+08 \n",
"7 2715 0.156396 1.877799e+08 \n",
"1 2721 0.049440 9.915890e+07 \n",
"28 2720 0.605867 4.151887e+07 \n",
"31 2748 0.001728 5.615078e+06 \n",
"27 2658 0.585541 2.806663e+07 \n",
"14 1629 0.000000 3.486588e+08 \n",
"20 1863 0.596097 5.322517e+07 \n",
"21 2611 0.173396 1.711869e+07 \n",
"23 1433 0.000000 3.705462e+07 \n",
"0 2118 0.647902 4.202653e+07 \n",
"32 1116 0.000000 3.136401e+05 \n",
"\n",
"France vs UK diagnostics:\n",
" country total_abs_flows total_signed_flows n_accounts_active \\\n",
"6 FRANCE 3.488683e+10 -4.092174e+09 2627 \n",
"31 UNITED KINGDOM 3.397122e+09 -1.094136e+08 605 \n",
"\n",
" n_flow_days share_multicountry_accounts avg_abs_flows_per_active_account \n",
"6 2724 0.034566 1.328010e+07 \n",
"31 2748 0.001728 5.615078e+06 \n",
"\n",
"France: top 10 accounts driving flows:\n",
" country Registrar Account - ID acc_abs_flows\n",
"2797 FRANCE PRIVATE CLIENT 2.379152e+09\n",
"2695 FRANCE 418652 1.162739e+09\n",
"787 FRANCE 200127454 8.858410e+08\n",
"1265 FRANCE 365596 7.407851e+08\n",
"875 FRANCE 200127809 7.233502e+08\n",
"1540 FRANCE 405760 7.060290e+08\n",
"877 FRANCE 200127811 5.845121e+08\n",
"2648 FRANCE 417622 5.822746e+08\n",
"271 FRANCE 200002327 5.160009e+08\n",
"1593 FRANCE 406163 5.046222e+08\n",
"\n",
"UK: top 10 accounts driving flows:\n",
" country Registrar Account - ID acc_abs_flows\n",
"4971 UNITED KINGDOM 200000225 3.112362e+08\n",
"5127 UNITED KINGDOM 200009976 2.835532e+08\n",
"5150 UNITED KINGDOM 200048865 2.719247e+08\n",
"5454 UNITED KINGDOM 365486 1.840476e+08\n",
"5161 UNITED KINGDOM 200072503 1.233866e+08\n",
"5200 UNITED KINGDOM 200102162 7.662158e+07\n",
"5039 UNITED KINGDOM 200001522 6.594899e+07\n",
"5165 UNITED KINGDOM 200079169 5.913266e+07\n",
"5230 UNITED KINGDOM 200127603 5.872504e+07\n",
"5260 UNITED KINGDOM 200128363 5.599629e+07\n"
]
2025-11-28 17:29:51 +01:00
}
],
"source": [
2026-02-01 22:06:59 +01:00
"import pandas as pd\n",
"import numpy as np\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# 0) QUICK SANITY: global totals\n",
"# ============================================================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"print(\"Global raw totals (flows):\")\n",
"print(\" - sum netflows €:\", flows[\"Value € - NetFlows\"].sum())\n",
"print(\" - sum abs netflows €:\", flows[\"Value € - NetFlows\"].abs().sum())\n",
"print(\" - n rows:\", len(flows))\n",
"print(\" - n accounts:\", flows[\"Registrar Account - ID\"].nunique())\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# 1) DEFINE ACTIVE ACCOUNTS (≥ 1 non-zero flow)\n",
"# ============================================================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"active_accounts = (\n",
" flows.loc[flows[\"Value € - NetFlows\"] != 0, \"Registrar Account - ID\"]\n",
" .unique()\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"flows_active = flows[flows[\"Registrar Account - ID\"].isin(active_accounts)].copy()\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# 2) COUNTRY MAPPING FROM STOCKS + DIAGNOSTIC\n",
"# Check if accounts are multi-country in STOCKS (bad sign)\n",
"# ============================================================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"acc_country_counts = (\n",
" stocks.groupby(\"Registrar Account - ID\")[\"RegistrarAccount - Country\"]\n",
" .nunique()\n",
" .rename(\"n_countries_in_stocks\")\n",
" .reset_index()\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"print(\"\\nAccounts with >1 country label in STOCKS (should be low):\",\n",
" (acc_country_counts[\"n_countries_in_stocks\"] > 1).sum())\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Use mode for mapping (as you did), but now we will measure if it's messy\n",
"account_country = (\n",
" stocks.groupby(\"Registrar Account - ID\")[\"RegistrarAccount - Country\"]\n",
" .agg(lambda x: x.mode().iloc[0])\n",
" .reset_index()\n",
" .rename(columns={\"RegistrarAccount - Country\": \"country\"})\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Merge diagnostics\n",
"account_country = account_country.merge(acc_country_counts, on=\"Registrar Account - ID\", how=\"left\")\n",
"\n",
"# Look at the worst offenders\n",
"print(\"\\nTop accounts with multi-country labels (examples):\")\n",
"print(account_country.sort_values(\"n_countries_in_stocks\", ascending=False).head(10))\n",
"\n",
"# ============================================================\n",
"# 3) CLEAN FLOW AGGREGATION TO AVOID DOUBLE COUNTING\n",
"# Key step: aggregate flows first, then take abs, then sum.\n",
"# Two options:\n",
"# - Option A: Account x Date (recommended for \"activity intensity\")\n",
"# - Option B: Account x ISIN x Date (keeps instrument dimension)\n",
"# ============================================================\n",
"\n",
"# Option A (recommended): aggregate per account-date\n",
"flows_acc_day = (\n",
" flows_active\n",
" .groupby([\"Registrar Account - ID\", \"Centralisation Date\"], as_index=False)[\"Value € - NetFlows\"]\n",
" .sum()\n",
")\n",
"\n",
"flows_acc_day[\"abs_flow\"] = flows_acc_day[\"Value € - NetFlows\"].abs()\n",
"\n",
"# Attach country\n",
"flows_acc_day = flows_acc_day.merge(\n",
" account_country[[\"Registrar Account - ID\", \"country\", \"n_countries_in_stocks\"]],\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# 4) COUNTRY TOTALS (robust)\n",
"# ============================================================\n",
"\n",
"country_flows = (\n",
" flows_acc_day\n",
" .groupby(\"country\", as_index=False)\n",
" .agg(\n",
" total_abs_flows=(\"abs_flow\", \"sum\"),\n",
" total_signed_flows=(\"Value € - NetFlows\", \"sum\"),\n",
" n_accounts_active=(\"Registrar Account - ID\", \"nunique\"),\n",
" n_flow_days=(\"Centralisation Date\", \"nunique\"),\n",
" share_multicountry_accounts=(\"n_countries_in_stocks\", lambda x: (x > 1).mean())\n",
" )\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Helpful ratios\n",
"country_flows[\"avg_abs_flows_per_active_account\"] = country_flows[\"total_abs_flows\"] / country_flows[\"n_accounts_active\"].replace(0, np.nan)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Sort\n",
"country_flows = country_flows.sort_values(\"total_abs_flows\", ascending=False)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"print(\"\\nTop 15 countries by total_abs_flows (robust):\")\n",
"print(country_flows.head(15))\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# 5) FOCUS CHECK: France vs UK\n",
"# ============================================================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"focus = country_flows[country_flows[\"country\"].isin([\"FRANCE\", \"UNITED KINGDOM\", \"UK\", \"UNITED-KINGDOM\"])].copy()\n",
"print(\"\\nFrance vs UK diagnostics:\")\n",
"print(focus)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# ============================================================\n",
"# 6) EXTRA CHECK: is France driven by a few accounts?\n",
"# ============================================================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"flows_acc = (\n",
" flows_acc_day\n",
" .groupby([\"country\", \"Registrar Account - ID\"], as_index=False)\n",
" .agg(acc_abs_flows=(\"abs_flow\", \"sum\"))\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"fr = flows_acc[flows_acc[\"country\"] == \"FRANCE\"].sort_values(\"acc_abs_flows\", ascending=False)\n",
"uk = flows_acc[flows_acc[\"country\"] == \"UNITED KINGDOM\"].sort_values(\"acc_abs_flows\", ascending=False)\n",
"\n",
"print(\"\\nFrance: top 10 accounts driving flows:\")\n",
"print(fr.head(10))\n",
"\n",
"print(\"\\nUK: top 10 accounts driving flows:\")\n",
"print(uk.head(10))\n"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 19,
"id": "99b7401c-ca9a-4571-96ed-f2db30ce8c70",
2025-11-28 17:29:51 +01:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
2026-02-01 22:06:59 +01:00
"colorscale": [
[
0,
"rgb(255,247,236)"
],
[
1,
"rgb(255,247,236)"
]
],
2025-11-28 17:29:51 +01:00
"customdata": {
2026-02-01 22:06:59 +01:00
"bdata": "FYwKLqpit0EAAAAAAAAiQGUZ4roebJJBAAAAAAAAGEDNzMw807BzQQAAAAAAAABASL/9UpVXh0EAAAAAAAAIQBSuR7kxgXJBAAAAAAAAGEDOqs9FS4hIQQAAAAAAABRALSEfkBN7V0EAAAAAAAAIQD0K1xM/qF9BAAAAAAAAFEBIv33rkcdiQQAAAAAAACJAkML1DAuTdEEAAAAAAADwP3zysIRm8l1BAAAAAAAACEDVeOk+uyVjQQAAAAAAAAhAH4XrUbguaEAAAAAAAADwPyv2lw2cmHRBAAAAAAAAHEDD9SgEUFp6QQAAAAAAAABAio7knVBbgEEAAAAAAAAUQBb7y9wrxXtBAAAAAAAAGEBI4WootzK3QQAAAAAAJJJAR5sCXqc1c0HpoosuuuhHQEebAl6nNXNB6aKLLrroR0BHmwJepzVzQemiiy666EdAR5sCXqc1c0HpoosuuuhHQEebAl6nNXNB6aKLLrroR0BHmwJepzVzQemiiy666EdAR5sCXqc1c0HpoosuuuhHQEebAl6nNXNB6aKLLrroR0BHmwJepzVzQemiiy666EdAR5sCXqc1c0HpoosuuuhHQEebAl6nNXNB6aKLLrroR0A=",
2025-11-28 17:29:51 +01:00
"dtype": "f8",
2026-02-01 22:06:59 +01:00
"shape": "29, 2"
2025-11-28 17:29:51 +01:00
},
2026-02-01 22:06:59 +01:00
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>flow_bin=< €0.5bn<br>iso3=%{location}<br>total_abs_flows=%{customdata[0]:,.0f}<br>n_accounts_active=%{customdata[1]}<extra></extra>",
"hovertext": [
"AUSTRIA",
"CANADA",
"CZECH REPUBLIC",
2025-11-28 17:29:51 +01:00
"DENMARK",
"FINLAND",
2026-02-01 22:06:59 +01:00
"GREECE",
"HONG KONG",
"IRELAND",
"ISRAEL",
"LITHUANIA",
"MALTA",
2025-11-28 17:29:51 +01:00
"MAURITIUS",
2026-02-01 22:06:59 +01:00
"POLAND",
"SINGAPORE",
"SOUTH AFRICA",
"TAIWAN",
2025-11-28 17:29:51 +01:00
"UNITED ARAB EMIRATES",
2026-02-01 22:06:59 +01:00
"UNITED STATES",
"ARGENTINA",
"BRAZIL",
"CHILE",
"COLOMBIA",
"MEXICO",
"PERU",
"URUGUAY",
"PARAGUAY",
"BOLIVIA",
"ECUADOR",
"VENEZUELA"
],
"locations": [
"AUT",
"CAN",
"CZE",
"DNK",
"FIN",
"GRC",
"HKG",
"IRL",
"ISR",
"LTU",
"MLT",
"MUS",
"POL",
"SGP",
"ZAF",
"TWN",
"ARE",
"USA",
"ARG",
"BRA",
"CHL",
"COL",
"MEX",
"PER",
"URY",
"PRY",
"BOL",
"ECU",
"VEN"
],
"name": "< €0.5bn",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(254,232,200)"
],
[
1,
"rgb(254,232,200)"
]
],
"customdata": {
"bdata": "Ns0TkRzIxEEAAAAAAAAAQJEPAmNj5MRBAAAAAAAAKEC1FYsI+fzAQQAAAAAAAEBA/yG9A875vkEAAAAAAAAsQKkT5JkbTdJBAAAAAACAQkA=",
"dtype": "f8",
"shape": "5, 2"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>flow_bin=€0.52bn<br>iso3=%{location}<br>total_abs_flows=%{customdata[0]:,.0f}<br>n_accounts_active=%{customdata[1]}<extra></extra>",
"hovertext": [
"JAPAN",
2025-11-28 17:29:51 +01:00
"MONACO",
"NETHERLANDS",
2026-02-01 22:06:59 +01:00
"PORTUGAL",
"SWEDEN"
],
"locations": [
"JPN",
"MCO",
"NLD",
"PRT",
"SWE"
],
"name": "€0.52bn",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1,
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(253,212,158)"
],
[
1,
"rgb(253,212,158)"
]
],
"customdata": {
"bdata": "F0gETJOl6kEAAAAAAPCCQA==",
"dtype": "f8",
"shape": "1, 2"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>flow_bin=€210bn<br>iso3=%{location}<br>total_abs_flows=%{customdata[0]:,.0f}<br>n_accounts_active=%{customdata[1]}<extra></extra>",
"hovertext": [
"UNITED KINGDOM"
2025-11-28 17:29:51 +01:00
],
2026-02-01 22:06:59 +01:00
"locations": [
"GBR"
],
"name": "€210bn",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1
]
},
{
"colorscale": [
[
0,
"rgb(253,187,132)"
],
[
1,
"rgb(253,187,132)"
]
],
"customdata": {
"bdata": "ZaolLP4xEEIAAAAAACBiQKLFTu7dQRJCAAAAAAAAVEAUUAcz/6kLQgAAAAAAMHFA",
"dtype": "f8",
"shape": "3, 2"
},
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>flow_bin=€1030bn<br>iso3=%{location}<br>total_abs_flows=%{customdata[0]:,.0f}<br>n_accounts_active=%{customdata[1]}<extra></extra>",
"hovertext": [
"BELGIUM",
"GERMANY",
"SWITZERLAND"
],
"locations": [
"BEL",
"DEU",
"CHE"
],
"name": "€1030bn",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1
]
},
{
"colorscale": [
[
0,
"rgb(252,141,89)"
],
[
1,
"rgb(252,141,89)"
]
],
"customdata": {
"bdata": "msh6L4kiJkIAAAAAAI6kQH/bLt2K+itCAAAAAADgfUDdRClAIQMuQgAAAAAAYHNAqZNwtKlCJ0IAAAAAAKB0QA==",
"dtype": "f8",
"shape": "4, 2"
2025-11-28 17:29:51 +01:00
},
2026-02-01 22:06:59 +01:00
"geo": "geo",
"hovertemplate": "<b>%{hovertext}</b><br><br>flow_bin=> €30bn<br>iso3=%{location}<br>total_abs_flows=%{customdata[0]:,.0f}<br>n_accounts_active=%{customdata[1]}<extra></extra>",
"hovertext": [
"FRANCE",
"ITALY",
"LUXEMBOURG",
"SPAIN"
],
"locations": [
"FRA",
"ITA",
"LUX",
"ESP"
],
"name": "> €30bn",
"showlegend": true,
"showscale": false,
"type": "choropleth",
"z": [
1,
1,
1,
1
]
2025-11-28 17:29:51 +01:00
}
],
"layout": {
2026-02-01 22:06:59 +01:00
"geo": {
"center": {},
"coastlinecolor": "rgba(0,0,0,0.3)",
"domain": {
"x": [
0,
1
],
"y": [
0,
1
]
},
"projection": {
"type": "natural earth"
},
"showcoastlines": true,
"showframe": false
},
2025-11-28 17:29:51 +01:00
"legend": {
2026-02-01 22:06:59 +01:00
"title": {
"text": "Total absolute net flows (€)"
},
2025-11-28 17:29:51 +01:00
"tracegroupgap": 0
},
2026-02-01 22:06:59 +01:00
"margin": {
"b": 0,
"l": 0,
"r": 0,
"t": 40
},
2025-11-28 17:29:51 +01:00
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermap": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermap"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
}
}
},
2026-02-01 22:06:59 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAyoAAAFoCAYAAAC4xg+JAAAQAElEQVR4AexdBUBVSRf+Ht3dSJeUgqiI3d0dv6vu2mutuebatbau7eoaa3c3FioKWKACiqgg3R3yz7n6WFBQUBDQQefeOzNnzpz57n3vzTdnZq5EDv/jCHAEOAIcAY4AR4AjwBHgCHAEOALlDAEJ8D+OAEeghBHg6jgCHAGOAEeAI8AR4AhwBL4WAU5UvhZBXp4jwBHgCHAESh8BXgNHgCPAEeAI/HAIcKLyw91y3mCOAEeAI8AR4AhwBDgCAMeAI1DeEeBEpbzfIW4fR4AjwBHgCHAEOAIcAY4AR+AHRKACEpUf8C7xJnMEOAIcAY4AR4AjwBHgCHAEfjAEOFH5wW44by5HoEAEeCJHgCPAEeAIcAQ4AhyBcoYAJyrl7IZwczgCHAGOAEfg+0CAt4IjwBHgCHAEvg4BTlS+Dj9emiPAEeAIcAQ4AhwBjgBH4NsgwGv5wRDgROUHu+G8uRwBjgBHgCPAEeAIcAQ4AhyBioAAJyrf4i7xOjgCHAGOAEeAI8AR4AhwBDgCHIFiIcCJSrHg4sIcAY5AeUGA28ER4AhwBDgCHAGOwPeNACcq3/f95a3jCHAEOAIcAY5AURHgchwBjgBHoFwhwIlKubod3BiOAEeAI8AR4AhwBDgCHIHvBwHekq9BgBOVr0GPl+UIcAQ4AhwBjgBHgCPAEeAIcARKBQFOVEoF1oqvlLeAI8AR4AhwBDgCHAGOAEeAI1CWCHCiUpbo87o5AhyBHwkB3laOAEeAI8AR4AhwBIqBACcqxQCLi3IEOAIcAY4AR4AjUJ4Q4LZwBDgC3zMCnKh8z3eXt40jwBHgCHAEOAIcAY4AR4AjUBwEypEsJyrl6GZwUzgCHAGOAEeAI8AR4AhwBDgCHIF3CHCi8g4Hfqz4CPAWcAQ4AhwBjgBHgCPAEeAIfEcIcKLyHd1M3hSOAEeAI1CyCHBtHAGOAEeAI8ARKDsEOFEpO+x5zRwBjgBHgCPAEeAI/GgI8PZyBDgCRUaAE5UiQ8UFOQIcAY4AR4AjwBHgCHAEOAIcgW+FQFGJyreyh9fDEeAIcAQ4AhwBjgBHgCPAEeAIcATAiQp/CDgCZYYAr5gjwBHgCHAEOAIcAY4AR6AwBDhRKQwZns4R4AhwBDgCFQ8BbjFHgCPAEeAIfDcIcKLy3dxK3hCOAEeAI8AR4AhwBDgCJY8A18gRKCsEOFEpK+R5vRwBjgBHgCPAEeAIcAQ4AhwBjkChCHzHRKXQNvMMjgBHgCPAEeAIcAQ4AhwBjgBHoJwjwIlKOb9B3DyOQLlCgBvDEeAIcAQ4AhwBjgBH4BshwInKNwKaV8MR4AhwBDgCHIGCEOBpHAGOAEeAI1AwApyoFIwLT+UIcAQ4AhwBjgBHgCPAEaiYCHCrvxMEOFH5Tm4kbwZHgCPAEeAIcAQ4AhwBjgBH4HtCgBOV8nQ3uS0cAY4AR4AjwBHgCHAEOAIcAY6AgAAnKgIM/MAR4Ah8rwjwdnEEOAIcAY4AR4AjUDER4ESlYt43bjVHgCPAEeAIcATKCgFeL0eAI8AR+CYIcKLyTWDmlXAEOAIcAY4AR4AjwBHgCHAECkOApxeEACcqBaHC0zgCHAGOAEeAI8AR4AhwBDgCHIEyRYATlTKFv+JXzlvAEeAIcAQ4AhwBjgBHgCPAESgNBDhRKQ1UuU6OAEeAI/DlCPCSHAGOAEeAI8AR4AgwBDhRYSDw/xwBjgBHgCPAEeAIfM8I8LZxBDgCFREBTlQq4l3jNnMEOAIcAY4AR4AjwBHgCHAEyhKBb1A3JyrfAGReBUeAI8AR4AhwBDgCHAGOAEeAI1A8BDhRKR5eXLriI8BbwBHgCHAEOAIcAY4AR4AjUAEQ4ESlAtwkbiJHgCPAESjfCHDrOAIcAY4AR4AjUPIIcKJS8phyjRwBjgBHgCPAEeAIcAS+DgFemiPAEQAnKvwh4AhwBDgCHAGOAEeAI/CDI3DZwwcte09EZHTcFyNBZUkH6fpiJZ8puGX3Kfz82yKkpKZ9RvL7yKb22jfsDwpTFmwCxctj++mek41kX1GRL0qZkiYqRbWNy3EEfhgEIiMjkZKSIrQ3LS0NMTExoLSIiAhQEDK+wSE1NRXx8fGIiooSbMjKysqt9dGjR/D29saDBw9w79493PXyxu07d3HrticLdL6D25534eV9D/cfPMTTp0/x4sULQVeuEn7BEeAIcAQ4Al+FgLijTx2+wgJ1Vj9VCXX+iCyQrk/J/Sh5RGioY1+cDnRpYlMce3yfvsC+Y5exb8NM+Lpvw/zJg0rTtC/WTc/aojW7MXZId/zSq3U+PZRHz2Pe51l8LxrVdsaa+aOxedcJUFvzFXwf4UTlPRD8xBEoKgJENh4+fIhLly4JnfuQkBBs27YN06dPx4ABA9CzRw/8Onw4unfvjrG//YYxY0aja9cuGDjwF3Tq1Akzpk/D4EGDMGTIEPzyy89o27YtOrN0kh86dAj69evHyozBoUOHcOLECcTFxX1kWmZmpkB+iHhQppgIRUdH4/Xr1wgLCwMRE09PT4F4/PPPP/h1xGhcuX4Ld+/54rbXAxw7eRYHDh3Ffha2bv8XDx4/R9pbWWRJKkEkqwppBQ3IKmlBXkWLnTUhpaCOHGklpGZJITopG8Gh0bjv64/T5y7gzNnzOMPON254kDk8cAQ4AhwBjsAXIKCtqYYz/y4WOqW+rGNKnTgjAx24H1yRm1ZeO6tf0Fxe5DMIRETHwlBPC2bGep+RLNvsY+c8YKCriV4dG+czhEhzwy5j0K1dw9znl55rJQW5XGJCZKVRHWfsOnQ+X1lxhBMVMRL8zBH4AIFp06YJZGLUqFGYOnWqQEBat26N3r17YfGihTh86ADW/rUG8+bOQXpKAjq0boTJY4dh+aI/MP+PCVi5eBbGjvwFu7asxqlDO7F5zWKcPrwTa5bOweE9W3B492bs27EBe7evw+a1SzB/5iS4ulRBiyb1IUI2du7ciVWrVmH79h24cu0GIwKXcOjoCezZfxhHT5zB2YtX4eF5D3v2HcKkSZMxdfoMrF2/AT6P/PE4IBjnLl4RyMiOf/cJZTOZB8XG3hlVnGuiajVX1KrbGLXqNUWtuk0wbtJ0NG/ZBpWMjGFgYAh9fQPo6elDl4XIyAhcv3oZF8+exKljh3Du9DEcP7QPRw7sxZFDB/Ho4SPBy3LT4xZued4RCFJiYiIyMjIEROPi4vDs2TMEBATA398fwcHBApEisiUI8ANHoCwQ4HVyBCowAuRVEY9Qu7Udntvpo1HpKfM34VVoBKiDSDIkC/ZHeSRLaeIgHtlm2UX6X1QdEZGxwjSywuohm8R5dKa42ACxx4HSKdBoPI3Ki/PznsWyH7aD9FEg2bkrduC2z2Ms27BPmD5FGFA7KI/0kn6qhwJ5Xkgn5RUUxPInLtwEyVIZCuK6xGXEcpRHgWTFej9lj7g8namTP2LKSsH2Gq2GIq/dlJ83UP1UD4W8clQn1U35YnnSS20mGymNzhSndIoTNqSDdFGga0qjvIIC1XHD8yHq1HSEgrxcrgjpLczL0qNDY9jbmObK9uncDF4P/HOf49wMdsGJCgOB//+xEaCpWOvXr8fMmTPx008/Ma9Ifzx+/BgvWIf6xYsX0FRTRu+ubbBs4R84xMjFoX83Y8fmVVi9ZA42//UnViyaiVbNG8K4kiGsrSygr6cLVVUVdtZBJQP9QsFNTklBQGCQQCg2bt2FCVPn4MiJc/DzD4K2jiE6d+6CPv/rjxp1mkBT1xiWtlVRvVYD1G/cErXrN4Vr7QaMcNSEU4066PvLcDRq1gat2neHtY09bOyqoIZbAwz59TcMHDqShV+xcOkaKCsr57NHQkICkpKS+dI+jMjKysLWzgENGjdH+07d0b13fzRs2hxKKiowMTWDQxUXVDKxgLqGFiQkpXDq7AVcv3kXx06dw5Hjp9m1F15HJOJlWBwjUYG4fPUm9h88jOvXb3xY1Q8Vz87OBhE6sTeMvGOhoaFISEjIxeHVq1c4d+4cyDP2/Plz5OTkCHnp6emIjY0Vpt5
},
"metadata": {},
"output_type": "display_data"
},
{
"ename": "RuntimeError",
"evalue": "\n\nKaleido requires Google Chrome to be installed.\n\nEither download and install Chrome yourself following Google's instructions for your operating system,\nor install it from your terminal by running:\n\n $ plotly_get_chrome\n\n",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mChromeNotFoundError\u001b[39m Traceback (most recent call last)",
"\u001b[31mChromeNotFoundError\u001b[39m: ",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mChromeNotFoundError\u001b[39m Traceback (most recent call last)",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/plotly/io/_kaleido.py:398\u001b[39m, in \u001b[36mto_image\u001b[39m\u001b[34m(fig, format, width, height, scale, validate, engine)\u001b[39m\n\u001b[32m 388\u001b[39m height = (\n\u001b[32m 389\u001b[39m height\n\u001b[32m 390\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m fig_dict.get(\u001b[33m\"\u001b[39m\u001b[33mlayout\u001b[39m\u001b[33m\"\u001b[39m, {}).get(\u001b[33m\"\u001b[39m\u001b[33mheight\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m (...)\u001b[39m\u001b[32m 395\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m defaults.default_height\n\u001b[32m 396\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m img_bytes = \u001b[43mkaleido\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcalc_fig_sync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 399\u001b[39m \u001b[43m \u001b[49m\u001b[43mfig_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 400\u001b[39m \u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mdict\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 401\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdefaults\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdefault_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 402\u001b[39m \u001b[43m \u001b[49m\u001b[43mwidth\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwidth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 403\u001b[39m \u001b[43m \u001b[49m\u001b[43mheight\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 404\u001b[39m \u001b[43m \u001b[49m\u001b[43mscale\u001b[49m\u001b[43m=\u001b[49m\u001b[43mscale\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdefaults\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdefault_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 405\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 406\u001b[39m \u001b[43m \u001b[49m\u001b[43mtopojson\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdefaults\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtopojson\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 407\u001b[39m \u001b[43m \u001b[49m\u001b[43mkopts\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkopts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 408\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 409\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ChromeNotFoundError:\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/kaleido/__init__.py:171\u001b[39m, in \u001b[36mcalc_fig_sync\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_sync_server\u001b[49m\u001b[43m.\u001b[49m\u001b[43moneshot_async_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcalc_fig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m=\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/kaleido/_sync_server.py:131\u001b[39m, in \u001b[36moneshot_async_run\u001b[39m\u001b[34m(func, args, kwargs)\u001b[39m\n\u001b[32m 130\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(res, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[32m--> \u001b[39m\u001b[32m131\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m res\n\u001b[32m 132\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/kaleido/_sync_server.py:122\u001b[39m, in \u001b[36moneshot_async_run.<locals>.run\u001b[39m\u001b[34m(func, q, *args, **kwargs)\u001b[39m\n\u001b[32m 121\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m122\u001b[39m q.put(\u001b[43masyncio\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 123\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e: \u001b[38;5;66;03m# noqa: BLE001\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/asyncio/runners.py:195\u001b[39m, in \u001b[36mrun\u001b[39m\u001b[34m(main, debug, loop_factory)\u001b[39m\n\u001b[32m 194\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m Runner(debug=debug, loop_factory=loop_factory) \u001b[38;5;28;01mas\u001b[39;00m runner:\n\u001b[32m--> \u001b[39m\u001b[32m195\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrunner\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmain\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/asyncio/runners.py:118\u001b[39m, in \u001b[36mRunner.run\u001b[39m\u001b[34m(self, coro, context)\u001b[39m\n\u001b[32m 117\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m118\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_loop\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 119\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m exceptions.CancelledError:\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/asyncio/base_events.py:725\u001b[39m, in \u001b[36mBaseEventLoop.run_until_complete\u001b[39m\u001b[34m(self, future)\u001b[39m\n\u001b[32m 723\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[33m'\u001b[39m\u001b[33mEvent loop stopped before Future completed.\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m725\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfuture\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/kaleido/__init__.py:101\u001b[39m, in \u001b[36mcalc_fig\u001b[39m\u001b[34m(fig, path, opts, topojson, kopts)\u001b[39m\n\u001b[32m 100\u001b[39m kopts[\u001b[33m\"\u001b[39m\u001b[33mn\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[32m1\u001b[39m \u001b[38;5;66;03m# should we force this?\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m101\u001b[39m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mKaleido\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkopts\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m k:\n\u001b[32m 102\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m k.calc_fig(\n\u001b[32m 103\u001b[39m fig,\n\u001b[32m 104\u001b[39m path=path,\n\u001b[32m 105\u001b[39m opts=opts,\n\u001b[32m 106\u001b[39m topojson=topojson,\n\u001b[32m 107\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/kaleido/kaleido.py:164\u001b[39m, in \u001b[36mKaleido.__init__\u001b[39m\u001b[34m(self, page_generator, n, timeout, width, height, stepper, plotlyjs, mathjax, *args, **kwargs)\u001b[39m\n\u001b[32m 163\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ChromeNotFoundError:\n\u001b[32m--> \u001b[39m\u001b[32m164\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ChromeNotFoundError(\n\u001b[32m 165\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mKaleido v1 and later requires Chrome to be installed. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 166\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mTo install Chrome, use the CLI command `kaleido_get_chrome`, \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 167\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mor from Python, use either `kaleido.get_chrome()` \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 168\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mor `kaleido.get_chrome_sync()`.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 169\u001b[39m ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mChromeNotFoundError\u001b[39;00m\n\u001b[32m 171\u001b[39m \u001b[38;5;66;03m# do this during open because it requires close\u001b[39;00m\n",
"\u001b[31mChromeNotFoundError\u001b[39m: Kaleido v1 and later requires Chrome to be installed. To install Chrome, use the CLI command `kaleido_get_chrome`, or from Python, use either `kaleido.get_chrome()` or `kaleido.get_chrome_sync()`.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[31mRuntimeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[19]\u001b[39m\u001b[32m, line 178\u001b[39m\n\u001b[32m 166\u001b[39m fig.update_layout(\n\u001b[32m 167\u001b[39m margin=\u001b[38;5;28mdict\u001b[39m(l=\u001b[32m0\u001b[39m, r=\u001b[32m0\u001b[39m, t=\u001b[32m40\u001b[39m, b=\u001b[32m0\u001b[39m),\n\u001b[32m 168\u001b[39m legend_title_text=\u001b[33m\"\u001b[39m\u001b[33mTotal absolute net flows (€)\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 174\u001b[39m )\n\u001b[32m 175\u001b[39m )\n\u001b[32m 177\u001b[39m fig.show()\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43mfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrite_image\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtotal_flow_intensity_world.png\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscale\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/plotly/basedatatypes.py:3895\u001b[39m, in \u001b[36mBaseFigure.write_image\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 3891\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mengine\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m 3892\u001b[39m warnings.warn(\n\u001b[32m 3893\u001b[39m ENGINE_PARAM_DEPRECATION_MSG, \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m, stacklevel=\u001b[32m2\u001b[39m\n\u001b[32m 3894\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m3895\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpio\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrite_image\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/plotly/io/_kaleido.py:528\u001b[39m, in \u001b[36mwrite_image\u001b[39m\u001b[34m(fig, file, format, scale, width, height, validate, engine)\u001b[39m\n\u001b[32m 524\u001b[39m \u001b[38;5;28mformat\u001b[39m = infer_format(path, \u001b[38;5;28mformat\u001b[39m)\n\u001b[32m 526\u001b[39m \u001b[38;5;66;03m# Request image\u001b[39;00m\n\u001b[32m 527\u001b[39m \u001b[38;5;66;03m# Do this first so we don't create a file if image conversion fails\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m528\u001b[39m img_data = \u001b[43mto_image\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 529\u001b[39m \u001b[43m \u001b[49m\u001b[43mfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 530\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 531\u001b[39m \u001b[43m \u001b[49m\u001b[43mscale\u001b[49m\u001b[43m=\u001b[49m\u001b[43mscale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 532\u001b[39m \u001b[43m \u001b[49m\u001b[43mwidth\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwidth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 533\u001b[39m \u001b[43m \u001b[49m\u001b[43mheight\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 534\u001b[39m \u001b[43m \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[43m=\u001b[49m\u001b[43mvalidate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 535\u001b[39m \u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[43m=\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 536\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 538\u001b[39m \u001b[38;5;66;03m# Open file\u001b[39;00m\n\u001b[32m 539\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 540\u001b[39m \u001b[38;5;66;03m# We previously failed to make sense of `file` as a pathlib object.\u001b[39;00m\n\u001b[32m 541\u001b[39m \u001b[38;5;66;03m# Attempt to write to `file` as an open file descriptor.\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/plotly/io/_kaleido.py:410\u001b[39m, in \u001b[36mto_image\u001b[39m\u001b[34m(fig, format, width, height, scale, validate, engine)\u001b[39m\n\u001b[32m 398\u001b[39m img_bytes = kaleido.calc_fig_sync(\n\u001b[32m 399\u001b[39m fig_dict,\n\u001b[32m 400\u001b[39m opts=\u001b[38;5;28mdict\u001b[39m(\n\u001b[32m (...)\u001b[39m\u001b[32m 407\u001b[39m kopts=kopts,\n\u001b[32m 408\u001b[39m )\n\u001b[32m 409\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ChromeNotFoundError:\n\u001b[32m--> \u001b[39m\u001b[32m410\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(PLOTLY_GET_CHROME_ERROR_MSG)\n\u001b[32m 412\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 413\u001b[39m \u001b[38;5;66;03m# Kaleido v0\u001b[39;00m\n\u001b[32m 414\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ENABLE_KALEIDO_V0_DEPRECATION_WARNINGS:\n",
"\u001b[31mRuntimeError\u001b[39m: \n\nKaleido requires Google Chrome to be installed.\n\nEither download and install Chrome yourself following Google's instructions for your operating system,\nor install it from your terminal by running:\n\n $ plotly_get_chrome\n\n"
]
}
],
"source": [
"# ============================================================\n",
"# GEOGRAPHIC COVERAGE — TOTAL FLOW INTENSITY (LATAM + USA FIXED)\n",
"# ============================================================\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.express as px\n",
"import pycountry\n",
"\n",
"# ------------------------------------------------------------\n",
"# Helper: country name → ISO-3\n",
"# ------------------------------------------------------------\n",
"\n",
"def country_to_iso3(name):\n",
" try:\n",
" return pycountry.countries.lookup(name).alpha_3\n",
" except:\n",
" return None\n",
"\n",
"# ------------------------------------------------------------\n",
"# 0. Harmonize country labels (USA + special buckets)\n",
"# ------------------------------------------------------------\n",
"\n",
"stocks[\"RegistrarAccount - Country\"] = stocks[\"RegistrarAccount - Country\"].replace({\n",
" \"US OFFSHORE\": \"UNITED STATES\"\n",
"})\n",
"\n",
"flows[\"RegistrarAccount - Country\"] = flows[\"RegistrarAccount - Country\"].replace({\n",
" \"US OFFSHORE\": \"UNITED STATES\"\n",
"})\n",
"\n",
"# ------------------------------------------------------------\n",
"# 1. Aggregate flows at Account × Date level (robust)\n",
"# ------------------------------------------------------------\n",
"\n",
"flows_acc_day = (\n",
" flows\n",
" .assign(abs_flow=lambda x: x[\"Value € - NetFlows\"].abs())\n",
" .groupby([\"Registrar Account - ID\", \"Centralisation Date\"], as_index=False)\n",
" .agg(abs_flow=(\"abs_flow\", \"sum\"))\n",
")\n",
"\n",
"# ------------------------------------------------------------\n",
"# 2. Map each Registrar Account to a country (from STOCKS)\n",
"# ------------------------------------------------------------\n",
"\n",
"account_country = (\n",
" stocks\n",
" .groupby(\"Registrar Account - ID\")[\"RegistrarAccount - Country\"]\n",
" .agg(lambda x: x.mode().iloc[0])\n",
" .reset_index()\n",
")\n",
"\n",
"flows_acc_day = flows_acc_day.merge(\n",
" account_country,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# ------------------------------------------------------------\n",
"# 3. Aggregate TOTAL absolute flows per country\n",
"# ------------------------------------------------------------\n",
"\n",
"country_flows = (\n",
" flows_acc_day\n",
" .groupby(\"RegistrarAccount - Country\", as_index=False)\n",
" .agg(\n",
" total_abs_flows=(\"abs_flow\", \"sum\"),\n",
" n_accounts_active=(\"Registrar Account - ID\", \"nunique\")\n",
" )\n",
")\n",
"\n",
"# ------------------------------------------------------------\n",
"# 4. Handle LATAM explicitly (visual redistribution)\n",
"# ------------------------------------------------------------\n",
"\n",
"latam_countries = [\n",
" \"ARGENTINA\", \"BRAZIL\", \"CHILE\", \"COLOMBIA\", \"MEXICO\",\n",
" \"PERU\", \"URUGUAY\", \"PARAGUAY\", \"BOLIVIA\",\n",
" \"ECUADOR\", \"VENEZUELA\"\n",
"]\n",
"\n",
"latam_row = country_flows[\n",
" country_flows[\"RegistrarAccount - Country\"] == \"LATAM\"\n",
"]\n",
"\n",
"if not latam_row.empty:\n",
" latam_flow = latam_row[\"total_abs_flows\"].iloc[0]\n",
" latam_accounts = latam_row[\"n_accounts_active\"].iloc[0]\n",
"\n",
" latam_df = pd.DataFrame({\n",
" \"RegistrarAccount - Country\": latam_countries,\n",
" \"total_abs_flows\": latam_flow / len(latam_countries),\n",
" \"n_accounts_active\": latam_accounts / len(latam_countries)\n",
" })\n",
"\n",
" country_flows = pd.concat(\n",
" [\n",
" country_flows[\n",
" ~country_flows[\"RegistrarAccount - Country\"].isin(\n",
" [\"LATAM\", \"INTERNATIONAL\", \"UNKNOWN\"]\n",
" )\n",
" ],\n",
" latam_df\n",
" ],\n",
" ignore_index=True\n",
" )\n",
"\n",
"else:\n",
" country_flows = country_flows[\n",
" ~country_flows[\"RegistrarAccount - Country\"].isin(\n",
" [\"INTERNATIONAL\", \"UNKNOWN\"]\n",
" )\n",
" ]\n",
"\n",
"# ------------------------------------------------------------\n",
"# 5. Convert country names to ISO-3 (with diagnostics)\n",
"# ------------------------------------------------------------\n",
"\n",
"country_flows[\"iso3\"] = country_flows[\"RegistrarAccount - Country\"].apply(country_to_iso3)\n",
"\n",
"unmapped = country_flows[country_flows[\"iso3\"].isna()]\n",
"if not unmapped.empty:\n",
" print(\"Unmapped country labels:\")\n",
" print(unmapped[\"RegistrarAccount - Country\"].unique())\n",
"\n",
"country_flows = country_flows.dropna(subset=[\"iso3\"])\n",
"\n",
"# ------------------------------------------------------------\n",
"# 6. Discretize flows (order-of-magnitude legend)\n",
"# ------------------------------------------------------------\n",
"\n",
"bins = [0, 5e8, 2e9, 1e10, 3e10, np.inf]\n",
"labels = [\n",
" \"< €0.5bn\",\n",
" \"€0.52bn\",\n",
" \"€210bn\",\n",
" \"€1030bn\",\n",
" \"> €30bn\"\n",
"]\n",
"\n",
"country_flows[\"flow_bin\"] = pd.cut(\n",
" country_flows[\"total_abs_flows\"],\n",
" bins=bins,\n",
" labels=labels,\n",
" include_lowest=True\n",
")\n",
"\n",
"# ------------------------------------------------------------\n",
"# 7. World map — total flow intensity\n",
"# ------------------------------------------------------------\n",
"\n",
"fig = px.choropleth(\n",
" country_flows,\n",
" locations=\"iso3\",\n",
" color=\"flow_bin\",\n",
" hover_name=\"RegistrarAccount - Country\",\n",
" hover_data={\n",
" \"total_abs_flows\": \":,.0f\",\n",
" \"n_accounts_active\": True\n",
" },\n",
" category_orders={\"flow_bin\": labels},\n",
" color_discrete_sequence=px.colors.sequential.OrRd\n",
")\n",
"\n",
"fig.update_layout(\n",
" margin=dict(l=0, r=0, t=40, b=0),\n",
" legend_title_text=\"Total absolute net flows (€)\",\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=True,\n",
" coastlinecolor=\"rgba(0,0,0,0.3)\",\n",
" projection_type=\"natural earth\"\n",
" )\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "36b93cfb-d7f5-4ba4-a86b-024247bddc5a",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>n_accounts</th>\n",
" <th>iso3</th>\n",
" <th>account_bin</th>\n",
" <th>total_abs_flows</th>\n",
" <th>n_accounts_active</th>\n",
" <th>share_accounts</th>\n",
" <th>share_flows</th>\n",
" <th>flow_per_account</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>LUXEMBOURG</td>\n",
" <td>331.0</td>\n",
" <td>LUX</td>\n",
" <td>High (200999)</td>\n",
" <td>6.445076e+10</td>\n",
" <td>310.0</td>\n",
" <td>0.047619</td>\n",
" <td>0.228139</td>\n",
" <td>1.947153e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>ITALY</td>\n",
" <td>487.0</td>\n",
" <td>ITA</td>\n",
" <td>High (200999)</td>\n",
" <td>6.008376e+10</td>\n",
" <td>478.0</td>\n",
" <td>0.070062</td>\n",
" <td>0.212681</td>\n",
" <td>1.233753e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>SPAIN</td>\n",
" <td>340.0</td>\n",
" <td>ESP</td>\n",
" <td>High (200999)</td>\n",
" <td>4.995133e+10</td>\n",
" <td>330.0</td>\n",
" <td>0.048914</td>\n",
" <td>0.176814</td>\n",
" <td>1.469157e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>FRANCE</td>\n",
" <td>2672.0</td>\n",
" <td>FRA</td>\n",
" <td>Very high (≥ 1,000)</td>\n",
" <td>4.753435e+10</td>\n",
" <td>2631.0</td>\n",
" <td>0.384405</td>\n",
" <td>0.168259</td>\n",
" <td>1.778980e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>GERMANY</td>\n",
" <td>90.0</td>\n",
" <td>DEU</td>\n",
" <td>Medium (50199)</td>\n",
" <td>1.960362e+10</td>\n",
" <td>80.0</td>\n",
" <td>0.012948</td>\n",
" <td>0.069392</td>\n",
" <td>2.178180e+08</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RegistrarAccount - Country n_accounts iso3 account_bin \\\n",
"15 LUXEMBOURG 331.0 LUX High (200999) \n",
"12 ITALY 487.0 ITA High (200999) \n",
"24 SPAIN 340.0 ESP High (200999) \n",
"6 FRANCE 2672.0 FRA Very high (≥ 1,000) \n",
"7 GERMANY 90.0 DEU Medium (50199) \n",
"\n",
" total_abs_flows n_accounts_active share_accounts share_flows \\\n",
"15 6.445076e+10 310.0 0.047619 0.228139 \n",
"12 6.008376e+10 478.0 0.070062 0.212681 \n",
"24 4.995133e+10 330.0 0.048914 0.176814 \n",
"6 4.753435e+10 2631.0 0.384405 0.168259 \n",
"7 1.960362e+10 80.0 0.012948 0.069392 \n",
"\n",
" flow_per_account \n",
"15 1.947153e+08 \n",
"12 1.233753e+08 \n",
"24 1.469157e+08 \n",
"6 1.778980e+07 \n",
"7 2.178180e+08 "
]
2025-11-28 17:29:51 +01:00
},
2026-02-01 22:06:59 +01:00
"execution_count": 22,
2025-11-28 17:29:51 +01:00
"metadata": {},
2026-02-01 22:06:59 +01:00
"output_type": "execute_result"
2025-11-28 17:29:51 +01:00
}
],
"source": [
2026-02-01 22:06:59 +01:00
"# ------------------------------------------------------------\n",
"# 8. Merge accounts & flows at country level\n",
"# ------------------------------------------------------------\n",
"\n",
"country_summary = (\n",
" country_accounts\n",
" .merge(\n",
" country_flows[[\n",
" \"RegistrarAccount - Country\",\n",
" \"total_abs_flows\",\n",
" \"n_accounts_active\"\n",
" ]],\n",
" on=\"RegistrarAccount - Country\",\n",
" how=\"inner\"\n",
" )\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"# Totals (for shares)\n",
"total_accounts = country_summary[\"n_accounts\"].sum()\n",
"total_flows = country_summary[\"total_abs_flows\"].sum()\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"country_summary[\"share_accounts\"] = country_summary[\"n_accounts\"] / total_accounts\n",
"country_summary[\"share_flows\"] = country_summary[\"total_abs_flows\"] / total_flows\n",
"country_summary[\"flow_per_account\"] = (\n",
" country_summary[\"total_abs_flows\"] / country_summary[\"n_accounts\"]\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"country_summary.sort_values(\n",
" \"total_abs_flows\", ascending=False\n",
").head()\n"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": 23,
"id": "fa687277-05fe-405e-967d-9ba91dce2a95",
2025-11-28 17:29:51 +01:00
"metadata": {},
"outputs": [
{
2026-02-01 22:06:59 +01:00
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>n_accounts</th>\n",
" <th>total_abs_flows</th>\n",
" <th>share_accounts</th>\n",
" <th>share_flows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>UNITED STATES</td>\n",
" <td>1166.0</td>\n",
" <td>3.891997e+08</td>\n",
" <td>0.167746</td>\n",
" <td>0.001378</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>UNITED KINGDOM</td>\n",
" <td>622.0</td>\n",
" <td>3.576470e+09</td>\n",
" <td>0.089484</td>\n",
" <td>0.012660</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" RegistrarAccount - Country n_accounts total_abs_flows share_accounts \\\n",
"30 UNITED STATES 1166.0 3.891997e+08 0.167746 \n",
"29 UNITED KINGDOM 622.0 3.576470e+09 0.089484 \n",
"\n",
" share_flows \n",
"30 0.001378 \n",
"29 0.012660 "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
2025-11-28 17:29:51 +01:00
}
],
"source": [
2026-02-01 22:06:59 +01:00
"# Countries with many accounts but low flow intensity\n",
"low_flow_high_accounts = (\n",
" country_summary\n",
" .query(\"share_accounts > 0.05 and share_flows < 0.05\")\n",
" .sort_values(\"n_accounts\", ascending=False)\n",
")\n",
"\n",
"low_flow_high_accounts[\n",
" [\n",
" \"RegistrarAccount - Country\",\n",
" \"n_accounts\",\n",
" \"total_abs_flows\",\n",
" \"share_accounts\",\n",
" \"share_flows\"\n",
" ]\n",
"]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbebadf0-19a3-426a-bfb1-96b48a94832f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# =========================\n",
"# STYLE — PUBLICATION\n",
"# =========================\n",
"\n",
"sns.set_theme(\n",
" style=\"whitegrid\",\n",
" context=\"paper\",\n",
" font_scale=1.2\n",
")\n",
"\n",
"# =========================\n",
"# DATA PREP\n",
"# =========================\n",
"\n",
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n",
"\n",
"snapshot_date = stocks[\"Centralisation Date\"].max()\n",
"\n",
"aum = (\n",
" stocks.loc[stocks[\"Centralisation Date\"] == snapshot_date]\n",
" .groupby(\"Registrar Account - ID\")[\"Value - AUM €\"]\n",
" .sum()\n",
" .rename(\"aum_eur\")\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"flow_activity = (\n",
" flows\n",
" .groupby(\"Registrar Account - ID\")[\"Value € - NetFlows\"]\n",
" .apply(lambda x: x.abs().mean())\n",
" .rename(\"avg_abs_flow\")\n",
")\n",
"\n",
"df = pd.concat([aum, flow_activity], axis=1).dropna()\n",
"df = df[(df[\"aum_eur\"] > 0) & (df[\"avg_abs_flow\"] > 0)]\n",
"\n",
"# =========================\n",
"# ECDF FUNCTION\n",
"# =========================\n",
"\n",
"def ecdf(x):\n",
" x = np.sort(x)\n",
" y = np.arange(1, len(x) + 1) / len(x)\n",
" return x, y\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"x_aum, y_aum = ecdf(df[\"aum_eur\"])\n",
"x_flow, y_flow = ecdf(df[\"avg_abs_flow\"])\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# =========================\n",
"# PLOT\n",
"# =========================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"fig, ax = plt.subplots(figsize=(7.5, 5))\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"ax.plot(x_aum, y_aum, label=\"Assets under management (AUM)\", linewidth=2)\n",
"ax.plot(x_flow, y_flow, label=\"Average absolute net flows\", linewidth=2)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Percentiles to annotate\n",
"percentiles = [0.5, 0.75, 0.9]\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"for p in percentiles:\n",
" q_aum = np.quantile(df[\"aum_eur\"], p)\n",
" q_flow = np.quantile(df[\"avg_abs_flow\"], p)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
" ax.scatter(q_aum, p, s=25)\n",
" ax.scatter(q_flow, p, s=25)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
" ax.text(\n",
" q_aum, p,\n",
" f\"{int(p*100)}%\",\n",
" ha=\"left\", va=\"bottom\", fontsize=9\n",
" )\n",
"\n",
"ax.set_xscale(\"log\")\n",
"ax.set_xlabel(\"€ (log scale)\")\n",
"ax.set_ylabel(\"Cumulative share of registrar accounts\")\n",
"ax.set_title(\"Empirical Distributions of Account Size and Flow Activity\")\n",
"ax.legend(frameon=False)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"sns.despine()\n",
"plt.tight_layout()\n",
"plt.show()\n"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": null,
"id": "9d647926-4bea-415d-9eb4-9745d76565e7",
2025-11-28 17:29:51 +01:00
"metadata": {},
2026-02-01 22:06:59 +01:00
"outputs": [],
2025-11-28 17:29:51 +01:00
"source": [
2026-02-01 22:06:59 +01:00
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# =========================\n",
"# STYLE — PUBLICATION\n",
"# =========================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"sns.set_theme(\n",
" style=\"whitegrid\",\n",
" context=\"paper\",\n",
" font_scale=1.2\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# =========================\n",
"# DATA PREP\n",
"# =========================\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"snapshot_date = stocks[\"Centralisation Date\"].max()\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# AUM snapshot\n",
"aum = (\n",
" stocks.loc[stocks[\"Centralisation Date\"] == snapshot_date]\n",
" .groupby(\"Registrar Account - ID\")[\"Value - AUM €\"]\n",
" .sum()\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Flow activity\n",
"flow_activity = (\n",
" flows\n",
" .groupby(\"Registrar Account - ID\")[\"Value € - NetFlows\"]\n",
" .apply(lambda x: x.abs().mean())\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"df = pd.concat([aum, flow_activity], axis=1)\n",
"df.columns = [\"aum_eur\", \"avg_abs_flow\"]\n",
"df = df.dropna()\n",
"df = df[(df[\"aum_eur\"] > 0) & (df[\"avg_abs_flow\"] > 0)]\n",
"\n",
"# =========================\n",
"# QUANTILES\n",
"# =========================\n",
"\n",
"quantiles = [0.5, 0.75, 0.9, 0.95, 0.99]\n",
"\n",
"rows = []\n",
"for q in quantiles:\n",
" rows.append({\n",
" \"Percentile\": f\"{int(q*100)}%\",\n",
" \"Variable\": \"Assets under management (AUM)\",\n",
" \"Value\": df[\"aum_eur\"].quantile(q)\n",
" })\n",
" rows.append({\n",
" \"Percentile\": f\"{int(q*100)}%\",\n",
" \"Variable\": \"Average absolute net flows\",\n",
" \"Value\": df[\"avg_abs_flow\"].quantile(q)\n",
" })\n",
"\n",
"q_df = pd.DataFrame(rows)\n",
"\n",
"# =========================\n",
"# PLOT\n",
"# =========================\n",
"\n",
"plt.figure(figsize=(8, 5))\n",
"\n",
"sns.barplot(\n",
" data=q_df,\n",
" x=\"Percentile\",\n",
" y=\"Value\",\n",
" hue=\"Variable\"\n",
")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.ylabel(\"€ (log scale)\")\n",
"plt.xlabel(\"Registrar account percentile\")\n",
"#plt.title(\"Account Size vs Flow Activity Across the Distribution\")\n",
"plt.legend(frameon=False)\n",
"\n",
"sns.despine()\n",
"plt.tight_layout()\n",
"\n",
"plt.savefig(\n",
" \"aum_vs_flow_activity_concentration_top10.png\",\n",
" dpi=300,\n",
" bbox_inches=\"tight\"\n",
")\n",
"\n",
"plt.show()"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
2026-02-01 22:06:59 +01:00
"execution_count": null,
"id": "dc2aed5b-67a1-453d-9d74-d47a25135658",
2025-11-28 17:29:51 +01:00
"metadata": {},
2026-02-01 22:06:59 +01:00
"outputs": [],
2025-11-28 17:29:51 +01:00
"source": [
2026-02-01 22:06:59 +01:00
"# ------------------------------------------------------------\n",
"# 5. Convert country names to ISO-3\n",
"# ------------------------------------------------------------\n",
"\n",
"def country_to_iso3(name):\n",
" try:\n",
" return pycountry.countries.lookup(name).alpha_3\n",
" except:\n",
" return None\n",
"\n",
"country_flows[\"iso3\"] = country_flows[\"country\"].apply(country_to_iso3)\n",
"\n",
"# Diagnostic: unmapped countries\n",
"unmapped = country_flows[country_flows[\"iso3\"].isna()]\n",
"if not unmapped.empty:\n",
" print(\"Unmapped country labels:\")\n",
" print(unmapped[\"country\"].unique())\n",
"\n",
"country_flows = country_flows.dropna(subset=[\"iso3\"])\n",
"\n",
"# ------------------------------------------------------------\n",
"# 6. Discretize total absolute flows (order of magnitude)\n",
"# ------------------------------------------------------------\n",
"\n",
"bins = [0, 5e8, 2e9, 1e10, 3e10, np.inf]\n",
"labels = [\n",
" \"< €0.5bn\",\n",
" \"€0.52bn\",\n",
" \"€210bn\",\n",
" \"€1030bn\",\n",
" \"> €30bn\"\n",
2025-11-28 17:29:51 +01:00
"]\n",
"\n",
"\n",
2026-02-01 22:06:59 +01:00
"country_flows[\"flow_bin\"] = pd.cut(\n",
" country_flows[\"total_abs_flows\"],\n",
" bins=bins,\n",
" labels=labels,\n",
" include_lowest=True\n",
")\n",
"\n",
"# ------------------------------------------------------------\n",
"# 7. World map — total absolute net flows\n",
"# ------------------------------------------------------------\n",
"\n",
"fig = px.choropleth(\n",
" country_flows,\n",
" locations=\"iso3\",\n",
" color=\"flow_bin\",\n",
" hover_name=\"country\",\n",
" hover_data={\n",
" \"total_abs_flows\": \":,.0f\",\n",
" \"n_accounts_active\": True\n",
" },\n",
" category_orders={\"flow_bin\": labels},\n",
" color_discrete_sequence=px.colors.sequential.OrRd,\n",
" title=\"Geographic distribution of total absolute net flows (€)\"\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"fig.update_layout(\n",
" margin=dict(l=0, r=0, t=60, b=0),\n",
" legend_title_text=\"Total absolute net flows (€)\",\n",
" geo=dict(\n",
" showframe=False,\n",
" showcoastlines=True,\n",
" coastlinecolor=\"rgba(0,0,0,0.3)\",\n",
" projection_type=\"natural earth\"\n",
" )\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a37023a0-388c-4883-a193-36d4c3eeead2",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# =========================\n",
"# STYLE\n",
"# =========================\n",
"sns.set_theme(style=\"whitegrid\", context=\"paper\", font_scale=1.2)\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# =========================\n",
"# DATA PREP\n",
"# =========================\n",
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"snapshot_date = stocks[\"Centralisation Date\"].max()\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# AUM snapshot par account\n",
"aum = (\n",
" stocks.loc[stocks[\"Centralisation Date\"] == snapshot_date]\n",
" .groupby(\"Registrar Account - ID\")[\"Value - AUM €\"]\n",
" .sum()\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"# Flow activity totale par account\n",
"flows_activity = (\n",
" flows\n",
" .groupby(\"Registrar Account - ID\")[\"Value € - NetFlows\"]\n",
" .apply(lambda x: x.abs().sum())\n",
2025-11-28 17:29:51 +01:00
")\n",
"\n",
2026-02-01 22:06:59 +01:00
"df = pd.concat([aum, flows_activity], axis=1)\n",
"df.columns = [\"aum_eur\", \"abs_flows\"]\n",
"df = df.dropna()\n",
"df = df[(df[\"aum_eur\"] > 0) & (df[\"abs_flows\"] > 0)]\n",
"\n",
"# =========================\n",
"# TOP 10 % VS BOTTOM 90 %\n",
"# =========================\n",
"top_pct = 0.10\n",
"df_sorted = df.sort_values(\"aum_eur\", ascending=False)\n",
"cutoff = int(len(df_sorted) * top_pct)\n",
"\n",
"top = df_sorted.iloc[:cutoff]\n",
"bottom = df_sorted.iloc[cutoff:]\n",
"\n",
"aum_top = top[\"aum_eur\"].sum()\n",
"aum_bottom = bottom[\"aum_eur\"].sum()\n",
"\n",
"flows_top = top[\"abs_flows\"].sum()\n",
"flows_bottom = bottom[\"abs_flows\"].sum()\n",
"\n",
"# Normalisation\n",
"aum_total = aum_top + aum_bottom\n",
"flows_total = flows_top + flows_bottom\n",
"\n",
"plot_df = pd.DataFrame({\n",
" \"Metric\": [\"AUM\", \"Flow activity\"],\n",
" \"Top 10%\": [aum_top / aum_total, flows_top / flows_total],\n",
" \"Bottom 90%\": [aum_bottom / aum_total, flows_bottom / flows_total],\n",
"})\n",
"\n",
"# =========================\n",
"# PLOT — STACKED BAR\n",
"# =========================\n",
"fig, ax = plt.subplots(figsize=(6, 4))\n",
"\n",
"ax.bar(\n",
" plot_df[\"Metric\"],\n",
" plot_df[\"Top 10%\"],\n",
" label=\"Top 10% of registrar accounts\",\n",
" color=\"steelblue\"\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"ax.bar(\n",
" plot_df[\"Metric\"],\n",
" plot_df[\"Bottom 90%\"],\n",
" bottom=plot_df[\"Top 10%\"],\n",
" label=\"Bottom 90% of registrar accounts\",\n",
" color=\"lightgrey\"\n",
")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"ax.set_ylim(0, 1)\n",
"ax.set_ylabel(\"Share of total\")\n",
"ax.set_title(\"Concentration of AUM and Flow Activity\\nTop 10% vs Bottom 90% of Accounts\")\n",
2025-11-28 17:29:51 +01:00
"\n",
2026-02-01 22:06:59 +01:00
"ax.legend(frameon=False)\n",
"sns.despine()\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"id": "7f64f406-6f06-4d56-9684-834aaccefe5c",
"metadata": {},
"source": [
"# MERGE"
2025-11-28 17:29:51 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
2026-02-01 22:06:59 +01:00
"id": "e9bb67ab-9029-4ace-b960-b3d6e0b8683c",
"metadata": {},
"outputs": [],
"source": [
"# 1. Merge behavior (flows) with static diversification (stocks)\n",
"client_master = client_behavior.merge(\n",
" account_static,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# 2. Add geographic info\n",
"client_master = client_master.merge(\n",
" geo,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# 3. Create additional engineered features\n",
"client_master[\"log_total_aum\"] = np.log1p(client_master[\"total_aum\"].clip(lower=0))\n",
"client_master[\"log_median_aum\"] = np.log1p(client_master[\"median_aum\"].clip(lower=0))\n",
"\n",
"\n",
"# 4. Replace NaN flow volatility with 0 (inactive accounts)\n",
"client_master[\"flow_volatility\"] = client_master[\"flow_volatility\"].fillna(0)\n",
"\n",
"# 5. Fill missing diversification metrics with 0 (for accounts without stocks)\n",
"client_master[[\"n_isin_held\",\"n_funds_held\",\"n_asset_types\",\"n_strategies\"]] = \\\n",
" client_master[[\"n_isin_held\",\"n_funds_held\",\"n_asset_types\",\"n_strategies\"]].fillna(0)\n",
"\n",
"# 6. Fill missing geography as “UNKNOWN”\n",
"client_master[\"country\"] = client_master[\"country\"].fillna(\"UNKNOWN\")\n",
"client_master[\"region\"] = client_master[\"region\"].fillna(\"UNKNOWN\")\n",
"\n",
"# 7. Export\n",
"client_master.to_csv(\"client_master.csv\", index=False)\n",
"\n",
"print(client_master.head())\n",
"print(client_master.describe(include='all'))\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "ea64d610-7816-4ead-8af5-33999fee87b4",
2025-11-28 17:29:51 +01:00
"metadata": {},
"outputs": [],
2026-02-01 22:06:59 +01:00
"source": [
"stocks.to_csv('stocks.csv')\n",
"flows.to_csv('flows.csv')"
]
2025-11-28 17:29:51 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2026-02-01 22:06:59 +01:00
"version": "3.13.11"
2025-11-28 17:29:51 +01:00
}
},
2025-11-14 16:27:55 +01:00
"nbformat": 4,
"nbformat_minor": 5
}