Project_Carmignac/data_exploration/dataloader.ipynb

2831 lines
599 KiB
Plaintext
Raw Normal View History

2025-12-05 09:49:17 +01:00
{
"cells": [
2025-12-05 11:03:19 +01:00
{
"cell_type": "markdown",
"id": "2b62d0d6",
"metadata": {},
"source": [
"# Imports and setup"
]
},
2025-12-05 09:49:17 +01:00
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 30,
2025-12-05 09:49:17 +01:00
"id": "126c8a80-d9ad-4816-84f0-0c3d580f62c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: openpyxl in /opt/python/lib/python3.13/site-packages (3.1.5)\n",
2025-12-05 11:03:19 +01:00
"Requirement already satisfied: et-xmlfile in /opt/python/lib/python3.13/site-packages (from openpyxl) (2.0.0)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
2025-12-05 09:49:17 +01:00
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
2025-12-05 11:03:19 +01:00
"!pip install openpyxl\n",
"import os\n",
"import s3fs\n",
"import seaborn as sns\n",
"import plotly.express as px"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 2,
2025-12-05 09:49:17 +01:00
"id": "ff2261fb-9516-4410-b42d-3acc8dc1a460",
"metadata": {},
"outputs": [],
"source": [
"fs = s3fs.S3FileSystem(\n",
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
]
},
{
"cell_type": "markdown",
"id": "3d36f3f0-bd40-4a83-96d1-b46d75f5a4c5",
"metadata": {},
"source": [
2025-12-05 11:03:19 +01:00
"# Exploration"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "markdown",
"id": "eaf5c5a0-eb1c-4242-b893-7600e6def109",
"metadata": {},
"source": [
"Fonctions utiles"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": null,
2025-12-05 09:49:17 +01:00
"id": "60e2035c-c2f0-4c51-97df-102e67ba96ee",
"metadata": {},
"outputs": [],
"source": [
"def plot_account(account_id, isin=None):\n",
" \"\"\"\n",
" Plots the stock (Quantity - AUM) evolution for a given Registrar Account.\n",
" Optionally, only for one ISIN.\n",
" \"\"\"\n",
"\n",
" df = merged.copy()\n",
"\n",
" # Filter by account\n",
" df = df[df[\"Registrar Account - ID\"] == account_id]\n",
"\n",
" if isin is not None:\n",
" df = df[df[\"Product - Isin\"] == isin]\n",
"\n",
" if df.empty:\n",
" print(f\"No data found for account {account_id}\")\n",
" return\n",
"\n",
" df_plot = df.groupby(\"Centralisation Date\")[\"Quantity - AUM\"].sum().reset_index()\n",
"\n",
" df_plot = df_plot.sort_values(\"Centralisation Date\")\n",
"\n",
" # Plot\n",
" plt.figure(figsize=(12, 4))\n",
" plt.plot(df_plot[\"Centralisation Date\"], df_plot[\"Quantity - AUM\"], marker='o')\n",
" plt.title(f\"Stock Evolution for Account {account_id}\", fontsize=14)\n",
" plt.xlabel(\"Date\")\n",
" plt.ylabel(\"Total AUM\")\n",
" plt.grid(True)\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 4,
2025-12-05 09:49:17 +01:00
"id": "37e008b1-32d4-44be-9d23-1b90a5a26f89",
"metadata": {},
"outputs": [],
"source": [
"# 2. BASIC INSPECTION\n",
"\n",
"def quick_info(df, name):\n",
" print(\"\\n\" + \"=\"*80)\n",
" print(f\"DATASET : {name}\")\n",
" print(\"=\"*80)\n",
" print(\"\\nShape :\", df.shape)\n",
" print(\"\\nColumns :\", df.columns.tolist())\n",
" print(\"\\nDtypes :\\n\", df.dtypes)\n",
" print(\"\\nMissing values (%) :\\n\", df.isna().mean().sort_values(ascending=False)*100)\n",
" print(\"\\nSample rows:\\n\", df.head(5))\n",
" print(\"\\nUnique values per column:\\n\", df.nunique().sort_values(ascending=False))"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 7,
2025-12-05 09:49:17 +01:00
"id": "e67a99ea-ddf4-4627-8f48-ec183c671acb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2025-12-05 11:03:19 +01:00
"/tmp/ipykernel_5991/19230119.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" flows = pd.read_csv(f, sep=\";\")\n",
"/tmp/ipykernel_5991/19230119.py:5: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" stocks = pd.read_csv(f, sep=\";\")\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"================================================================================\n",
"DATASET : STOCKS\n",
"================================================================================\n",
"\n",
"Shape : (4880297, 18)\n",
"\n",
"Columns : ['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - ID', 'Registrar Account - Region', 'RegistrarAccount - Country', 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type', 'Product - Shareclass Currency', 'Product - Isin', 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY', 'Value - AUM €']\n",
"\n",
"Dtypes :\n",
" Agreement - Code object\n",
"Company - Id object\n",
"Company - Ultimate Parent Id object\n",
"Registrar Account - ID object\n",
"Registrar Account - Region object\n",
"RegistrarAccount - Country object\n",
"Product - Asset Type object\n",
"Product - Strategy object\n",
"Product - Legal Status object\n",
"Product - Is Dedie ? object\n",
"Product - Fund object\n",
"Product - Shareclass Type object\n",
"Product - Shareclass Currency object\n",
"Product - Isin object\n",
"Centralisation Date object\n",
"Quantity - AUM float64\n",
"Value - AUM CCY float64\n",
"Value - AUM € float64\n",
"dtype: object\n",
"\n",
"Missing values (%) :\n",
" Product - Asset Type 6.471553\n",
"Company - Id 2.330801\n",
"Company - Ultimate Parent Id 2.330801\n",
"Product - Strategy 0.001537\n",
"Product - Shareclass Type 0.000717\n",
"Agreement - Code 0.000000\n",
"RegistrarAccount - Country 0.000000\n",
"Registrar Account - Region 0.000000\n",
"Product - Legal Status 0.000000\n",
"Registrar Account - ID 0.000000\n",
"Product - Is Dedie ? 0.000000\n",
"Product - Fund 0.000000\n",
"Product - Shareclass Currency 0.000000\n",
"Product - Isin 0.000000\n",
"Centralisation Date 0.000000\n",
"Quantity - AUM 0.000000\n",
"Value - AUM CCY 0.000000\n",
"Value - AUM € 0.000000\n",
"dtype: float64\n",
"\n",
"Sample rows:\n",
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 3 166.0 166.0 \n",
"1 3 166.0 166.0 \n",
"2 3 166.0 166.0 \n",
"3 3 166.0 166.0 \n",
"4 3 166.0 166.0 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 \n",
"\n",
"Unique values per column:\n",
" Value - AUM € 1697923\n",
"Value - AUM CCY 1689620\n",
"Quantity - AUM 554404\n",
"Registrar Account - ID 15532\n",
"Agreement - Code 2521\n",
"Company - Id 1970\n",
"Company - Ultimate Parent Id 1392\n",
"Product - Isin 491\n",
"Centralisation Date 130\n",
"Product - Fund 74\n",
"Product - Strategy 52\n",
"RegistrarAccount - Country 39\n",
"Registrar Account - Region 15\n",
"Product - Shareclass Type 11\n",
"Product - Legal Status 6\n",
"Product - Shareclass Currency 6\n",
"Product - Asset Type 5\n",
"Product - Is Dedie ? 2\n",
"dtype: int64\n",
"\n",
"================================================================================\n",
"DATASET : FLOWS\n",
"================================================================================\n",
"\n",
"Shape : (2574461, 24)\n",
"\n",
"Columns : ['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - ID', 'Registrar Account - Region', 'RegistrarAccount - Country', 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type', 'Product - Shareclass Currency', 'Product - Isin', 'Centralisation Date', 'Quantity - Subscription', 'Quantity - Redemption', 'Quantity - NetFlows', 'Value Ccy - Subscription', 'Value Ccy - Redemption', 'Value Ccy - NetFlows', 'Value € - Subscription', 'Value € - Redemption', 'Value € - NetFlows']\n",
"\n",
"Dtypes :\n",
" Agreement - Code object\n",
"Company - Id object\n",
"Company - Ultimate Parent Id object\n",
"Registrar Account - ID object\n",
"Registrar Account - Region object\n",
"RegistrarAccount - Country object\n",
"Product - Asset Type object\n",
"Product - Strategy object\n",
"Product - Legal Status object\n",
"Product - Is Dedie ? object\n",
"Product - Fund object\n",
"Product - Shareclass Type object\n",
"Product - Shareclass Currency object\n",
"Product - Isin object\n",
"Centralisation Date object\n",
"Quantity - Subscription float64\n",
"Quantity - Redemption float64\n",
"Quantity - NetFlows float64\n",
"Value Ccy - Subscription float64\n",
"Value Ccy - Redemption float64\n",
"Value Ccy - NetFlows float64\n",
"Value € - Subscription float64\n",
"Value € - Redemption float64\n",
"Value € - NetFlows float64\n",
"dtype: object\n",
"\n",
"Missing values (%) :\n",
" Product - Asset Type 0.079589\n",
"Company - Id 0.059818\n",
"Company - Ultimate Parent Id 0.059818\n",
"Product - Strategy 0.000233\n",
"Product - Shareclass Type 0.000078\n",
"Registrar Account - ID 0.000000\n",
"RegistrarAccount - Country 0.000000\n",
"Agreement - Code 0.000000\n",
"Registrar Account - Region 0.000000\n",
"Product - Legal Status 0.000000\n",
"Product - Is Dedie ? 0.000000\n",
"Product - Fund 0.000000\n",
"Product - Shareclass Currency 0.000000\n",
"Product - Isin 0.000000\n",
"Centralisation Date 0.000000\n",
"Quantity - Subscription 0.000000\n",
"Quantity - Redemption 0.000000\n",
"Quantity - NetFlows 0.000000\n",
"Value Ccy - Subscription 0.000000\n",
"Value Ccy - Redemption 0.000000\n",
"Value Ccy - NetFlows 0.000000\n",
"Value € - Subscription 0.000000\n",
"Value € - Redemption 0.000000\n",
"Value € - NetFlows 0.000000\n",
"dtype: float64\n",
"\n",
"Sample rows:\n",
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"\n",
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
"0 SICAV NO ... 2020-11-05 \n",
"1 FCP NO ... 2015-03-09 \n",
"2 FCP NO ... 2016-10-26 \n",
"3 FCP NO ... 2018-10-18 \n",
"4 FCP NO ... 2019-04-08 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
"[5 rows x 24 columns]\n",
"\n",
"Unique values per column:\n",
" Value € - NetFlows 2018916\n",
"Value Ccy - NetFlows 1972319\n",
"Value € - Redemption 1323531\n",
"Value Ccy - Redemption 1296468\n",
"Value € - Subscription 955890\n",
"Value Ccy - Subscription 926633\n",
"Quantity - NetFlows 667586\n",
"Quantity - Redemption 374378\n",
"Quantity - Subscription 359661\n",
"Registrar Account - ID 9805\n",
"Centralisation Date 2780\n",
"Company - Id 1929\n",
"Agreement - Code 1626\n",
"Company - Ultimate Parent Id 1283\n",
"Product - Isin 474\n",
"Product - Fund 70\n",
"Product - Strategy 49\n",
"RegistrarAccount - Country 34\n",
"Registrar Account - Region 15\n",
"Product - Shareclass Type 10\n",
"Product - Shareclass Currency 6\n",
"Product - Legal Status 6\n",
"Product - Asset Type 5\n",
"Product - Is Dedie ? 2\n",
"dtype: int64\n",
"\n",
"================================================================================\n",
"DATASET : NAV/PRICES\n",
"================================================================================\n",
"\n",
"Shape : (30333, 13)\n",
"\n",
"Columns : ['NavDate', 'LegalForm', 'Cod', 'PortfolioName', 'PTFCurrency', 'PortfolioAum_Eur', 'ShareClassIsin', 'ShareClassName', 'ShareClassCurrency', 'ShareClassPrice', 'NumberOfShares', 'ShareClassAumLocalCur', 'ShareClassAum_EUR']\n",
"\n",
"Dtypes :\n",
" 0\n",
"NavDate object\n",
"LegalForm object\n",
"Cod object\n",
"PortfolioName object\n",
"PTFCurrency object\n",
"PortfolioAum_Eur object\n",
"ShareClassIsin object\n",
"ShareClassName object\n",
"ShareClassCurrency object\n",
"ShareClassPrice object\n",
"NumberOfShares object\n",
"ShareClassAumLocalCur object\n",
"ShareClassAum_EUR object\n",
"dtype: object\n",
"\n",
"Missing values (%) :\n",
" 0\n",
"NavDate 0.0\n",
"LegalForm 0.0\n",
"Cod 0.0\n",
"PortfolioName 0.0\n",
"PTFCurrency 0.0\n",
"PortfolioAum_Eur 0.0\n",
"ShareClassIsin 0.0\n",
"ShareClassName 0.0\n",
"ShareClassCurrency 0.0\n",
"ShareClassPrice 0.0\n",
"NumberOfShares 0.0\n",
"ShareClassAumLocalCur 0.0\n",
"ShareClassAum_EUR 0.0\n",
"dtype: float64\n",
"\n",
"Sample rows:\n",
" 0 NavDate LegalForm Cod PortfolioName \\\n",
"0 31/12/2009 SICAV CC Carmignac Portfolio Climate Transition \n",
"1 31/12/2009 SICAV CFB Carmignac Portfolio Flexible Bond \n",
"2 31/12/2009 FCP CCT Carmignac Court Terme \n",
"3 31/12/2009 FCP CE Carmignac Emergents \n",
"4 31/12/2009 SICAV CAD Carmignac Portfolio Asia Discovery \n",
"\n",
"0 PTFCurrency PortfolioAum_Eur ShareClassIsin ShareClassName \\\n",
"0 EUR 941059600 LU0164455502 A EUR ACC \n",
"1 EUR 57063272.31 LU0336084032 A EUR ACC \n",
"2 EUR 788828666.5 FR0010149161 A EUR ACC \n",
"3 EUR 1508087050 FR0010149302 A EUR ACC \n",
"4 EUR 149490224.2 LU0336083810 A EUR ACC \n",
"\n",
"0 ShareClassCurrency ShareClassPrice NumberOfShares ShareClassAumLocalCur \\\n",
"0 EUR 287.21 3276555.83 941059600 \n",
"1 EUR 1016.833 56118.62745 57063272.31 \n",
"2 EUR 3687.84 213899.9161 788828666.5 \n",
"3 EUR 559.82 2693878.478 1508087050 \n",
"4 EUR 884.9 168934.5962 149490224.2 \n",
"\n",
"0 ShareClassAum_EUR \n",
"0 941059600 \n",
"1 57063272.31 \n",
"2 788828666.5 \n",
"3 1508087050 \n",
"4 149490224.2 \n",
"\n",
"Unique values per column:\n",
" 0\n",
"ShareClassAum_EUR 30211\n",
"ShareClassAumLocalCur 30032\n",
"NumberOfShares 28910\n",
"ShareClassPrice 14747\n",
"PortfolioAum_Eur 5505\n",
"ShareClassIsin 416\n",
"NavDate 210\n",
"ShareClassName 90\n",
"Cod 55\n",
"PortfolioName 55\n",
"LegalForm 6\n",
"ShareClassCurrency 6\n",
"PTFCurrency 2\n",
"dtype: int64\n"
2025-12-05 09:49:17 +01:00
]
}
],
"source": [
"with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f:\n",
" flows = pd.read_csv(f, sep=\";\")\n",
"\n",
"with fs.open('projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
" stocks = pd.read_csv(f, sep=\";\")\n",
"\n",
"with fs.open('projet-bdc-data/carmignac/Monthly AUM and NAV since 2010.xlsx', 'rb') as f:\n",
" nav_raw = pd.read_excel(f, header=None, engine=\"openpyxl\")\n",
"nav = nav_raw[0].str.split(\",\", expand=True)\n",
"nav.columns = nav.iloc[0]\n",
"nav = nav[1:].reset_index(drop=True)\n",
"\n",
"quick_info(stocks, \"STOCKS\")\n",
"quick_info(flows, \"FLOWS\")\n",
"quick_info(nav, \"NAV/PRICES\")"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 8,
2025-12-05 09:49:17 +01:00
"id": "9bc92c9f-216c-475e-bfb8-edc1a4e839f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date conversion done.\n",
"NAV numeric conversion done.\n",
"String normalization done.\n",
"\n",
"ISIN missing in FLOWS but present in STOCKS : 17\n",
"\n",
"ISIN missing in STOCKS but present in FLOWS : 0\n",
"\n",
"ISIN missing in NAV but present in FLOWS : 67\n",
"\n",
"ISIN missing in NAV but present in STOCKS : 76\n",
"\n",
"Accounts in STOCKS but NEVER in FLOWS : 5777\n",
"\n",
"Accounts in FLOWS but NEVER in STOCKS : 118\n",
"\n",
"CLIENT BEHAVIOR (first 5 rows):\n",
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \n",
"0 49.280511 0.000 -109.238 -1.092380e+11 \n",
"1 NaN 0.000 -660.115 -6.601150e+11 \n",
"2 NaN 0.000 -174.646 -1.746460e+11 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+11 \n",
"\n",
"FUND BEHAVIOR (first 5 rows):\n",
" Product - Isin n_accounts n_days total_netflows vol_flows\n",
"0 FR0010135103 2690 2723 -2.571327e+07 2622.609244\n",
"1 FR0010147603 733 2719 -2.562187e+06 1206.248205\n",
"2 FR0010148981 1841 2722 -3.609440e+06 1051.069183\n",
"3 FR0010148999 454 2306 -7.130297e+05 1265.364138\n",
"4 FR0010149112 934 2000 -9.438901e+05 1834.961721\n"
]
}
],
"source": [
"# 1. CLEAN DATES (formats différents)\n",
"\n",
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"], errors=\"coerce\")\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"nav[\"NavDate\"] = pd.to_datetime(nav[\"NavDate\"], format=\"%d/%m/%Y\", errors=\"coerce\")\n",
"\n",
"print(\"Date conversion done.\")\n",
"\n",
"# 2. CLEAN NUMERIC COLUMNS FOR NAV FILE\n",
"\n",
"num_cols = [\"PortfolioAum_Eur\",\"ShareClassPrice\",\"NumberOfShares\",\n",
" \"ShareClassAumLocalCur\",\"ShareClassAum_EUR\"]\n",
"\n",
"for col in num_cols:\n",
" nav[col] = (\n",
" nav[col]\n",
" .astype(str)\n",
" .str.replace(\",\", \".\", regex=False)\n",
" .str.replace(\" \", \"\")\n",
" .astype(float)\n",
" )\n",
"\n",
"print(\"NAV numeric conversion done.\")\n",
"\n",
"# 3. STANDARDIZE STRINGS FOR JOIN KEYS\n",
"\n",
"def norm(df):\n",
" for col in df.columns:\n",
" if df[col].dtype == \"object\":\n",
" df[col] = df[col].astype(str).str.strip().str.upper()\n",
" return df\n",
"\n",
"stocks = norm(stocks)\n",
"flows = norm(flows)\n",
"nav = norm(nav)\n",
"\n",
"print(\"String normalization done.\")\n",
"\n",
"\n",
"# 4. ANALYSE RELATIONS ACROSS FILES\n",
"\n",
"# Unique sets\n",
"isin_stocks = set(stocks[\"Product - Isin\"].unique())\n",
"isin_flows = set(flows[\"Product - Isin\"].unique())\n",
"isin_nav = set(nav[\"ShareClassIsin\"].unique())\n",
"\n",
"print(\"\\nISIN missing in FLOWS but present in STOCKS :\", len(isin_stocks - isin_flows))\n",
"print(\"\\nISIN missing in STOCKS but present in FLOWS :\", len(isin_flows - isin_stocks))\n",
"print(\"\\nISIN missing in NAV but present in FLOWS :\", len(isin_flows - isin_nav))\n",
"print(\"\\nISIN missing in NAV but present in STOCKS :\", len(isin_stocks - isin_nav))\n",
"\n",
"\n",
"# 5. CLIENTS: STOCKS VS FLOWS\n",
"\n",
"acc_stocks = set(stocks[\"Registrar Account - ID\"].unique())\n",
"acc_flows = set(flows[\"Registrar Account - ID\"].unique())\n",
"\n",
"print(\"\\nAccounts in STOCKS but NEVER in FLOWS :\", len(acc_stocks - acc_flows))\n",
"print(\"\\nAccounts in FLOWS but NEVER in STOCKS :\", len(acc_flows - acc_stocks))\n",
"\n",
"\n",
"# 6. CLIENT ACTIVITY METRICS (DETAILED)\n",
"\n",
"client_behavior = flows.groupby(\"Registrar Account - ID\").agg(\n",
" n_days=(\"Centralisation Date\", lambda x: x.nunique()),\n",
" n_transactions=(\"Quantity - NetFlows\", \"count\"),\n",
" total_netflows=(\"Quantity - NetFlows\", \"sum\"),\n",
" mean_flow=(\"Quantity - NetFlows\", \"mean\"),\n",
" std_flow=(\"Quantity - NetFlows\", \"std\"),\n",
" total_subscription=(\"Quantity - Subscription\", \"sum\"),\n",
" total_redemption=(\"Quantity - Redemption\", \"sum\")\n",
").reset_index()\n",
"\n",
"# Add churn metric\n",
"client_behavior[\"churn_ratio\"] = (\n",
" client_behavior[\"total_redemption\"] /\n",
" (client_behavior[\"total_subscription\"] + 1e-9)\n",
")\n",
"\n",
"print(\"\\nCLIENT BEHAVIOR (first 5 rows):\\n\", client_behavior.head())\n",
"\n",
"\n",
"# 7. FUNDS ACTIVITY METRICS\n",
"\n",
"fund_behavior = flows.groupby(\"Product - Isin\").agg(\n",
" n_accounts=(\"Registrar Account - ID\", \"nunique\"),\n",
" n_days=(\"Centralisation Date\", lambda x: x.nunique()),\n",
" total_netflows=(\"Quantity - NetFlows\", \"sum\"),\n",
" vol_flows=(\"Quantity - NetFlows\", \"std\")\n",
").reset_index()\n",
"\n",
"print(\"\\nFUND BEHAVIOR (first 5 rows):\\n\", fund_behavior.head())\n",
"\n",
"\n",
"# 8. SAVE INTERMEDIATE\n",
"\n",
2025-12-05 11:03:19 +01:00
"client_behavior.to_csv(\"csv_outputs/client_behavior.csv\", index=False)\n",
"fund_behavior.to_csv(\"csv_outputs/fund_behavior.csv\", index=False)"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 9,
2025-12-05 09:49:17 +01:00
"id": "afb51598-3a7b-41f2-8d25-5b4b8bfb1c8a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FULL usable ISIN : 407\n",
"Stocks only ISIN : 17\n",
"Flows only ISIN : 0\n",
"Missing NAV : 76\n",
"All ISIN groups saved into 4 separate files.\n"
]
}
],
"source": [
"valid_full = isin_stocks & isin_flows & isin_nav\n",
"stocks_only = isin_stocks - isin_flows\n",
"flows_only = isin_flows - isin_stocks\n",
"missing_nav = (isin_stocks | isin_flows) - isin_nav\n",
"\n",
"print(\"FULL usable ISIN :\", len(valid_full))\n",
"print(\"Stocks only ISIN :\", len(stocks_only))\n",
"print(\"Flows only ISIN :\", len(flows_only))\n",
"print(\"Missing NAV :\", len(missing_nav))\n",
"\n",
2025-12-05 11:03:19 +01:00
"pd.DataFrame({\"isin\": list(valid_full)}).to_csv(\"csv_outputs/isin_full.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(stocks_only)}).to_csv(\"csv_outputs/isin_stocks_only.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(flows_only)}).to_csv(\"csv_outputs/isin_flows_only.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(missing_nav)}).to_csv(\"csv_outputs/isin_missing_nav.csv\", index=False)\n",
2025-12-05 09:49:17 +01:00
"\n",
"print(\"All ISIN groups saved into 4 separate files.\")\n",
"\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 10,
2025-12-05 09:49:17 +01:00
"id": "61e0c71a-a1c6-4ed8-ba15-b7a9badc4d4a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \\\n",
"0 49.280511 0.000 -109.238 -1.092380e+08 \n",
"1 NaN 0.000 -660.115 -6.601150e+08 \n",
"2 NaN 0.000 -174.646 -1.746460e+08 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+08 \n",
"\n",
" churn_flag activity_score flow_volatility inertia_ratio \n",
"0 0 1.386294 49.280511 0.998921 \n",
"1 0 0.693147 0.000000 0.999640 \n",
"2 0 0.693147 0.000000 0.999640 \n",
"3 0 4.304065 2168.971331 0.976619 \n",
"4 0 0.693147 0.000000 0.999640 \n"
]
}
],
"source": [
"eps = 1e-6\n",
"\n",
"client_behavior[\"churn_ratio\"] = (\n",
" client_behavior[\"total_redemption\"] /\n",
" (client_behavior[\"total_subscription\"] + eps)\n",
")\n",
"\n",
"client_behavior[\"churn_flag\"] = (\n",
" client_behavior[\"total_redemption\"] > client_behavior[\"total_subscription\"]\n",
").astype(int)\n",
"\n",
"client_behavior[\"activity_score\"] = np.log1p(client_behavior[\"n_transactions\"])\n",
"\n",
"client_behavior[\"flow_volatility\"] = client_behavior[\"std_flow\"].fillna(0)\n",
"\n",
"client_behavior[\"inertia_ratio\"] = (\n",
" 1 - client_behavior[\"n_days\"] / flows[\"Centralisation Date\"].nunique()\n",
")\n",
"\n",
"print(client_behavior.head())\n",
"\n",
2025-12-05 11:03:19 +01:00
"client_behavior.to_csv(\"csv_outputs/client_behavior.csv\", index=False)\n"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 11,
2025-12-05 09:49:17 +01:00
"id": "8ee7e911-eb73-4846-b545-661140411c1b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2025-12-05 11:03:19 +01:00
"/tmp/ipykernel_5991/1645623303.py:17: RuntimeWarning: invalid value encountered in scalar divide\n",
2025-12-05 09:49:17 +01:00
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_isin_held n_funds_held n_asset_types \\\n",
"0 100000014 1 1 1 \n",
"1 100000016 2 2 2 \n",
"2 100000028 1 1 1 \n",
"3 100000038 3 3 2 \n",
"4 100000042 1 1 1 \n",
"\n",
" n_strategies total_aum median_aum concentration_ratio \n",
"0 1 0.0000 0.0 NaN \n",
"1 2 0.0000 0.0 NaN \n",
"2 1 126236.2184 0.0 1.0 \n",
"3 3 0.0000 0.0 NaN \n",
"4 1 446362.9015 0.0 1.0 \n",
" n_isin_held n_funds_held n_asset_types n_strategies total_aum \\\n",
"count 12501.000000 12501.000000 12501.000000 12501.000000 1.250100e+04 \n",
"mean 5.514759 4.408367 2.082473 4.109271 4.218474e+08 \n",
"std 10.434698 5.472756 1.254048 4.714800 5.618341e+09 \n",
"min 1.000000 1.000000 1.000000 1.000000 -2.586805e+08 \n",
"25% 1.000000 1.000000 1.000000 1.000000 0.000000e+00 \n",
"50% 2.000000 2.000000 2.000000 2.000000 2.587605e+05 \n",
"75% 6.000000 5.000000 3.000000 5.000000 8.817014e+06 \n",
"max 469.000000 67.000000 6.000000 48.000000 4.780234e+11 \n",
"\n",
" median_aum concentration_ratio \n",
"count 1.250100e+04 7708.000000 \n",
"mean 2.573991e+05 0.790503 \n",
"std 3.487976e+06 0.261535 \n",
"min -2.317333e+06 -2.591840 \n",
"25% 0.000000e+00 0.576503 \n",
"50% 0.000000e+00 0.972159 \n",
"75% 1.474502e+02 1.000000 \n",
"max 2.215373e+08 2.983529 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2025-12-05 11:03:19 +01:00
"/tmp/ipykernel_5991/1645623303.py:17: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
2025-12-05 09:49:17 +01:00
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n"
]
}
],
"source": [
"# Diversification per account\n",
"account_div = stocks.groupby(\"Registrar Account - ID\").agg(\n",
" n_isin_held=(\"Product - Isin\", \"nunique\"),\n",
" n_funds_held=(\"Product - Fund\", \"nunique\"),\n",
" n_asset_types=(\"Product - Asset Type\", \"nunique\"),\n",
" n_strategies=(\"Product - Strategy\", \"nunique\"),\n",
" total_aum=(\"Value - AUM €\", \"sum\"),\n",
" median_aum=(\"Value - AUM €\", \"median\")\n",
").reset_index()\n",
"\n",
"# Concentration ratio per account\n",
"aum_by_account_fund = stocks.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Fund\"]\n",
")[\"Value - AUM €\"].sum().reset_index()\n",
"\n",
"concentration = aum_by_account_fund.groupby(\"Registrar Account - ID\") \\\n",
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n",
" .reset_index(name=\"concentration_ratio\")\n",
"\n",
"# Merge diversification + concentration\n",
"account_static = account_div.merge(concentration, on=\"Registrar Account - ID\", how=\"left\")\n",
"\n",
"print(account_static.head())\n",
"print(account_static.describe())\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 12,
2025-12-05 09:49:17 +01:00
"id": "76f6fa0d-9d7a-4145-af1c-986d83947f91",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID country region\n",
"0 100000014 SWITZERLAND SWITZERLAND\n",
"1 100000016 UNITED KINGDOM UNITED KINGDOM\n",
"2 100000028 UNITED KINGDOM UNITED KINGDOM\n",
"3 100000038 SWITZERLAND SWITZERLAND\n",
"4 100000042 UNITED KINGDOM UNITED KINGDOM\n"
]
}
],
"source": [
"# Geographic info per account\n",
"geo = stocks.groupby(\"Registrar Account - ID\").agg(\n",
" country=(\"RegistrarAccount - Country\", lambda x: x.mode()[0]),\n",
" region=(\"Registrar Account - Region\", lambda x: x.mode()[0])\n",
").reset_index()\n",
"\n",
"print(geo.head())\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": null,
2025-12-05 09:49:17 +01:00
"id": "e9bb67ab-9029-4ace-b960-b3d6e0b8683c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \\\n",
"0 49.280511 0.000 -109.238 -1.092380e+08 \n",
"1 NaN 0.000 -660.115 -6.601150e+08 \n",
"2 NaN 0.000 -174.646 -1.746460e+08 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+08 \n",
"\n",
" churn_flag ... n_funds_held n_asset_types n_strategies total_aum \\\n",
"0 0 ... 1.0 1.0 1.0 126236.2184 \n",
"1 0 ... 1.0 1.0 1.0 446362.9015 \n",
"2 0 ... 1.0 1.0 1.0 488743.4240 \n",
"3 0 ... 0.0 0.0 0.0 NaN \n",
"4 0 ... 2.0 2.0 2.0 373322.8948 \n",
"\n",
" median_aum concentration_ratio country region \\\n",
"0 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"1 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"2 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"3 NaN NaN UNKNOWN UNKNOWN \n",
"4 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"\n",
" log_total_aum log_median_aum \n",
"0 11.745918 0.0 \n",
"1 13.008890 0.0 \n",
"2 13.099595 0.0 \n",
"3 NaN NaN \n",
"4 12.830202 0.0 \n",
"\n",
"[5 rows x 24 columns]\n",
" Registrar Account - ID n_days n_transactions total_netflows \\\n",
"count 6842 6842.000000 6842.000000 6.842000e+03 \n",
"unique 6842 NaN NaN NaN \n",
"top 100000028 NaN NaN NaN \n",
"freq 1 NaN NaN NaN \n",
"mean NaN 122.731804 376.273166 3.426362e+04 \n",
"std NaN 363.726141 1972.817028 1.644676e+06 \n",
"min NaN 1.000000 1.000000 -9.982301e+06 \n",
"25% NaN 2.000000 2.000000 -3.448661e+02 \n",
"50% NaN 5.000000 6.000000 -1.116000e+00 \n",
"75% NaN 27.000000 42.000000 4.220087e+01 \n",
"max NaN 2715.000000 53314.000000 1.319043e+08 \n",
"\n",
" mean_flow std_flow total_subscription total_redemption \\\n",
"count 6842.000000 5.696000e+03 6.842000e+03 6.842000e+03 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 420.503483 6.035653e+03 1.565831e+05 -1.223195e+05 \n",
"std 15548.555778 7.652710e+04 4.204641e+06 2.600731e+06 \n",
"min -333474.890000 0.000000e+00 -3.931320e+02 -2.069900e+08 \n",
"25% -35.658375 5.643245e+01 0.000000e+00 -6.968600e+03 \n",
"50% -0.067287 2.479222e+02 3.393685e+02 -4.493603e+02 \n",
"75% 2.235534 9.869604e+02 6.000000e+03 -7.275400e+01 \n",
"max 871531.706418 4.697263e+06 3.388942e+08 0.000000e+00 \n",
"\n",
" churn_ratio churn_flag ... n_funds_held n_asset_types \\\n",
"count 6.842000e+03 6842.000000 ... 6842.000000 6842.000000 \n",
"unique NaN NaN ... NaN NaN \n",
"top NaN NaN ... NaN NaN \n",
"freq NaN NaN ... NaN NaN \n",
"mean -7.445486e+08 0.000146 ... 5.054224 2.114294 \n",
"std 1.163193e+10 0.012090 ... 6.457703 1.342230 \n",
"min -5.212597e+11 0.000000 ... 0.000000 0.000000 \n",
"25% -2.279500e+06 0.000000 ... 1.000000 1.000000 \n",
"50% -1.048786e+00 0.000000 ... 2.000000 2.000000 \n",
"75% -9.333542e-01 0.000000 ... 7.000000 3.000000 \n",
"max 3.225589e+01 1.000000 ... 67.000000 6.000000 \n",
"\n",
" n_strategies total_aum median_aum concentration_ratio \\\n",
"count 6842.000000 6.724000e+03 6.724000e+03 6586.000000 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 4.631102 7.136776e+08 4.051506e+05 0.782903 \n",
"std 5.493014 7.438256e+09 4.121668e+06 0.267426 \n",
"min 0.000000 -9.151116e+07 -2.317333e+06 -2.591840 \n",
"25% 1.000000 5.107611e+05 0.000000e+00 0.561855 \n",
"50% 2.000000 4.221523e+06 0.000000e+00 0.965881 \n",
"75% 6.000000 3.987106e+07 2.531545e+04 1.000000 \n",
"max 48.000000 4.780234e+11 2.215373e+08 2.983529 \n",
"\n",
" country region log_total_aum log_median_aum \n",
"count 6842 6842 6724.000000 6724.000000 \n",
"unique 34 16 NaN NaN \n",
"top FRANCE FRANCE NaN NaN \n",
"freq 2631 2643 NaN NaN \n",
"mean NaN NaN 15.046065 4.392450 \n",
"std NaN NaN 4.320148 5.462132 \n",
"min NaN NaN 0.000000 0.000000 \n",
"25% NaN NaN 13.143657 0.000000 \n",
"50% NaN NaN 15.255707 0.000000 \n",
"75% NaN NaN 17.501160 10.139210 \n",
"max NaN NaN 26.892926 19.216101 \n",
"\n",
"[11 rows x 24 columns]\n"
]
}
],
"source": [
"# 1. Merge behavior (flows) with static diversification (stocks)\n",
"client_master = client_behavior.merge(\n",
" account_static,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# 2. Add geographic info\n",
"client_master = client_master.merge(\n",
" geo,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# 3. Create additional engineered features\n",
"client_master[\"log_total_aum\"] = np.log1p(client_master[\"total_aum\"].clip(lower=0))\n",
"client_master[\"log_median_aum\"] = np.log1p(client_master[\"median_aum\"].clip(lower=0))\n",
"\n",
"\n",
"# 4. Replace NaN flow volatility with 0 (inactive accounts)\n",
"client_master[\"flow_volatility\"] = client_master[\"flow_volatility\"].fillna(0)\n",
"\n",
"# 5. Fill missing diversification metrics with 0 (for accounts without stocks)\n",
"client_master[[\"n_isin_held\",\"n_funds_held\",\"n_asset_types\",\"n_strategies\"]] = \\\n",
" client_master[[\"n_isin_held\",\"n_funds_held\",\"n_asset_types\",\"n_strategies\"]].fillna(0)\n",
"\n",
"# 6. Fill missing geography as “UNKNOWN”\n",
"client_master[\"country\"] = client_master[\"country\"].fillna(\"UNKNOWN\")\n",
"client_master[\"region\"] = client_master[\"region\"].fillna(\"UNKNOWN\")\n",
"\n",
"# 7. Export\n",
2025-12-05 11:03:19 +01:00
"client_master.to_csv(\"csv_outputs/client_master.csv\", index=False)\n",
2025-12-05 09:49:17 +01:00
"\n",
"print(client_master.head())\n",
"print(client_master.describe(include='all'))\n"
]
},
{
"cell_type": "markdown",
"id": "fb1e98a5-6ab4-4371-ba45-6558ff38c839",
"metadata": {},
"source": [
2025-12-05 11:03:19 +01:00
"# Détection des ruptures"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 16,
2025-12-05 09:49:17 +01:00
"id": "6bdd8077-c8e0-451d-a7b8-15a2705ad196",
"metadata": {},
"outputs": [],
"source": [
"# --- 1. PREPARE STOCKS ---\n",
"stocks_clean = stocks[[\n",
" \"Registrar Account - ID\", \"Product - Isin\", \n",
" \"Centralisation Date\", \"Quantity - AUM\"\n",
"]].copy()\n",
"\n",
"stocks_clean[\"Centralisation Date\"] = pd.to_datetime(stocks_clean[\"Centralisation Date\"])\n",
"stocks_clean = stocks_clean.sort_values([\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"])\n",
"\n",
"# --- 2. PREPARE FLOWS ---\n",
"flows_clean = flows[[\n",
" \"Registrar Account - ID\", \"Product - Isin\", \n",
" \"Centralisation Date\", \"Quantity - NetFlows\"\n",
"]].copy()\n",
"\n",
"flows_clean[\"Centralisation Date\"] = pd.to_datetime(flows_clean[\"Centralisation Date\"])\n",
"\n",
"# Aggregate flows per day to avoid duplicates\n",
"flows_clean = flows_clean.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")[\"Quantity - NetFlows\"].sum().reset_index()\n",
"\n",
"# --- 3. MERGE STOCKS WITH FLOWS ---\n",
"merged = stocks_clean.merge(\n",
" flows_clean,\n",
" on=[\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"merged[\"Quantity - NetFlows\"] = merged[\"Quantity - NetFlows\"].fillna(0)\n",
"\n",
"# --- 4. SHIFT STOCKS TO COMPARE t vs t+1 ---\n",
"merged[\"prev_stock\"] = merged.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - AUM\"].shift(1)\n",
"\n",
"# SHIFT NET FLOWS FROM PREVIOUS DATE\n",
"merged[\"prev_netflows\"] = merged.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - NetFlows\"].shift(1).fillna(0)\n",
"\n",
"# Expected stock\n",
"merged[\"expected_stock\"] = merged[\"prev_stock\"] + merged[\"prev_netflows\"]\n",
"\n",
"# --- 5. COMPUTE GAP BETWEEN EXPECTED AND REAL ---\n",
"merged[\"gap\"] = merged[\"Quantity - AUM\"] - merged[\"expected_stock\"]\n",
"\n",
"# tolerance for numerical noise\n",
"TOL = 1e-6\n",
"merged[\"rupture_flag\"] = (merged[\"prev_stock\"].notna()) & (merged[\"gap\"].abs() > TOL)\n",
"\n",
"# --- 6. AGGREGATE BY CLIENT TO DETECT BIG ISSUES ---\n",
"rupture_summary = merged.groupby(\"Registrar Account - ID\").agg(\n",
" n_ruptures=(\"rupture_flag\", \"sum\"),\n",
" total_obs=(\"rupture_flag\", \"count\"),\n",
" rupture_ratio=(\"rupture_flag\", \"mean\"),\n",
" max_gap=(\"gap\", lambda x: x.abs().max())\n",
").reset_index()\n",
"\n",
"# Sort by biggest anomalies\n",
"rupture_summary = rupture_summary.sort_values(\"rupture_ratio\", ascending=False)\n",
"\n",
"rupture_summary.head(10)\n",
"\n",
"rupture_summary_asc = rupture_summary.sort_values(\"rupture_ratio\", ascending=True)\n",
2025-12-05 11:03:19 +01:00
"rupture_summary_asc.to_csv('csv_outputs/rupture.csv')\n",
"merged.to_csv('csv_outputs/merged.csv')"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": null,
2025-12-05 09:49:17 +01:00
"id": "71cd67aa-f4b9-489e-b928-defeca459cb6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_ruptures</th>\n",
" <th>total_obs</th>\n",
" <th>rupture_ratio</th>\n",
" <th>max_gap</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>200000331</td>\n",
" <td>0</td>\n",
" <td>160</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>182</th>\n",
" <td>200000361</td>\n",
" <td>0</td>\n",
" <td>80</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12198</th>\n",
" <td>422302</td>\n",
" <td>0</td>\n",
" <td>240</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12197</th>\n",
" <td>422299</td>\n",
" <td>0</td>\n",
" <td>80</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12191</th>\n",
" <td>422288</td>\n",
" <td>0</td>\n",
" <td>1200</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6522</th>\n",
" <td>365568</td>\n",
" <td>237</td>\n",
" <td>240</td>\n",
" <td>0.987500</td>\n",
" <td>16596.971</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>200000407</td>\n",
" <td>79</td>\n",
" <td>80</td>\n",
" <td>0.987500</td>\n",
" <td>63893.601</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6884</th>\n",
" <td>365966</td>\n",
" <td>79</td>\n",
" <td>80</td>\n",
" <td>0.987500</td>\n",
" <td>2673.873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7039</th>\n",
" <td>366351</td>\n",
" <td>258</td>\n",
" <td>260</td>\n",
" <td>0.992308</td>\n",
" <td>1998.948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>603</th>\n",
" <td>200001928</td>\n",
" <td>645</td>\n",
" <td>650</td>\n",
" <td>0.992308</td>\n",
" <td>110779.418</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>12501 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_ruptures total_obs rupture_ratio max_gap\n",
"165 200000331 0 160 0.000000 0.000\n",
"182 200000361 0 80 0.000000 0.000\n",
"12198 422302 0 240 0.000000 0.000\n",
"12197 422299 0 80 0.000000 0.000\n",
"12191 422288 0 1200 0.000000 0.000\n",
"... ... ... ... ... ...\n",
"6522 365568 237 240 0.987500 16596.971\n",
"197 200000407 79 80 0.987500 63893.601\n",
"6884 365966 79 80 0.987500 2673.873\n",
"7039 366351 258 260 0.992308 1998.948\n",
"603 200001928 645 650 0.992308 110779.418\n",
"\n",
"[12501 rows x 5 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rupture_summary_asc"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 17,
2025-12-05 09:49:17 +01:00
"id": "72332a7e-0ab0-474b-aac7-b52ebbba7a8b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+kAAAGKCAYAAABwwgCiAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAgTVJREFUeJzt3Xd8U+X+B/BPkqbpoBMoLauUDbLBQsUBSBliERFFEEERFQRFUH+KIqUuHOBGvMq6XkSGC1AsVKAsK5VRpJTdQhU7KKWbpiF5fn9gQtOMJm1m83m/Xtxrz3lO8pzm6cn5nmd8JUIIASIiIiIiIiJyOqmzK0BERERERERE1zFIJyIiIiIiInIRDNKJiIiIiIiIXASDdCIiIiIiIiIXwSCdiIiIiIiIyEUwSCciIiIiIiJyEQzSiYiIiIiIiFwEg3QiIiIiIiIiF8EgnYiIiIiIiMhFMEgnImrAFi5cCIlEguTkZGdXxWbOnz8PiUSCRx55xK7vM2jQIEgkEru+h7W2b9+OgQMHIiQkBBKJBGPGjHF2lYiIiMjGGKQTEdVBeXk53nrrLfTp0weNGjWCQqFAy5Ytcdttt2HevHk4d+6cXvk2bdqgTZs2zqmsDbRp0wYSicTsv/Pnzzu7mlZxtwcY58+fxz333IPMzEw8+uijiI+Px4MPPui0+ggh0L59e0gkEowaNcpp9XAVdW1P+/btw3PPPYe+ffuicePG8PHxQefOnfHiiy+iqKjI5HF//PEH7rrrLgQHB8Pf3x8DBgzAhg0bTJbPycnBY489hoiICPj4+KBTp0548803oVKpjJZXKpV47bXX0KFDB/j4+KB58+Z44oknkJ+fb/I9vv76a0RHR8Pf3x8hISG4++67cfjwYaNl16xZgyeffBL9+vWDQqGARCLB6tWrTb42ABw4cAD33HMPmjRpAoVCgQ4dOmDBggW4evWqQdm0tDS8+uqrGDBgAMLCwqBQKNC2bVs89dRTuHjxosn32LVrF+666y60atUKvr6+aNeuHSZOnIijR4+arRsRkS15ObsCRETuprS0FLfeeiv+/PNPtG/fHpMmTULjxo1RUFCA1NRUvP3222jXrh3atWvn7KralEwmw/z5803uDw4OdlxlHOCrr75CRUWFs6uh8+uvv6KyshJLlizBxIkTnV0dJCcn49y5c5BIJNi2bRv++ecfNG/e3NnVcjvjxo1DQUEBbr31VkyePFkX6L/77rv49ttv8dtvv6FZs2Z6x+zatQvDhw+Hj48PHnzwQQQEBOC7777D+PHj8ddff+G5557TK5+bm4v+/fvj77//xr333osOHTpg9+7dmD9/PlJTU/Hjjz/qjRrRaDS45557sG3bNgwYMAD33Xcfzpw5g+XLl2PHjh34/fff0bRpU733ePPNNzF//nxERkZi+vTpKC0txbp163DLLbdgx44dGDhwoF75+fPn48KFC2jSpAkiIiJw4cIFs7+n77//HuPHj4dMJsN9992H8PBw7N+/H6+//jp27tyJHTt2QKFQ6MpPnz4dBw4cQHR0NB588EEoFAocOHAAy5Ytw8aNG7F371507txZ7z0++eQTPPPMMwgODsbYsWPRtGlTnD59Ghs3bsS3336LrVu3YujQobV/qERE9SWIiMgqr732mgAgpk2bJjQajcH+zMxMceLECb1tkZGRIjIy0kE1vCE+Pl4AELt27arX60RGRgqFQmGbStVTVlaWACCmTJlSr9ex1e/GURISElyqvg899JAAIJ5//nkBQLz55pvOrpJT1bU9vf322+LixYt62zQajZgxY4YAIJ566im9fSqVSrRr104oFApx5MgR3faioiLRsWNH4e3tLc6fP693zOTJkwUAsWzZMr33ePDBBwUAsXbtWr3yK1euFADEhAkT9K5xy5YtEwDEE088oVf+9OnTwsvLS3Ts2FEUFRXpth85ckQoFArRpUsXoVar9Y5JSkrS1XPRokUCgFi1apXR31FFRYVo2rSpkMvl4uDBg3rnMHPmTAFALFq0SO+Yjz/+WJw5c8bgtd5++20BQNx1111626uqqkRgYKAIDAwU2dnZevu+//57AUAMHjzYaP2IiGyNQToRkZVGjhwpAOjdIJuiDSiN/YuPj9cru3LlShEdHS38/f2Fv7+/iI6ONnnTKoQQu3fvFvfcc48ICwsT3t7eomXLluLee+8Ve/fu1ZUxFTgcO3ZMtGjRQgQHB+uVN8WaIH3IkCFCIpEYBApaTz/9tAAgtm/frrfd0vM3FaSbexByxx13iOrPpbU/1/xX/fiax2ipVCqxZMkS0aNHD+Hj4yMCAwPFoEGDxObNmw3Krlq1Shd8bNu2TcTExAhfX18RGhoqJk+eLAoKCozW19j5GvtX/XM9duyYuP/++0XTpk2Ft7e3aNOmjZg9e7bR99D+rq5cuSJmzpwpWrZsKWQymdn2Vt2VK1eEj4+P6Natm6ioqBABAQGiXbt2Rh9aCXE9mFq5cqW49dZbRVBQkPD19RXt27cXTzzxhLhw4YJe2ZKSErFw4ULRvXt34evrKwIDA0WvXr3E/PnzRVVVlV7Zffv2ibvuukuEhIQIhUIhOnXqJBYsWCDKy8uN/g5NPdgBIO644w69bdrPv6qqSsTHx4vIyEjh7e0tOnToIJYuXWq0rLn2ZK1//vlHABA33XST3vZt27YJAOLRRx81OGb16tUCgEhISNBtKykpEQqFQrRt29bg8zl//rzR4DMmJkYAMPgb1mg0om3btsLf319UVFTots+bN08AEP/9738N6vTII48IAGL37t0mz7W2IP3XX38VAMT9999vsO/KlSu637Wp9lfdtWvXhK+vr/D399fbnpOTIwCIW265xeAYpVIpJBKJ6NatW62vT0RkCxzuTkRkpcaNGwMATp8+jV69epktGxwcjPj4eHz44YcAgGeffVa3b9CgQbr/fuaZZ/DJJ5+gRYsWeOyxxwAA3333HR599FEcOXIEH330kd7rfvTRR5gzZw58fX1x7733onXr1rh48SL27duHb7/9FrfeeqvJOu3btw9xcXHw9/fH3r170a1bN8tP3gIPP/wwdu7cia+//hovv/yy3r5r165h3bp1aN68Oe68807ddmvPv760i87t3r0bU6ZM0a0XUNuQfSEExo0bh02bNqFjx46YOXMmysvLsX79eowePRrvv/8+5syZY3Dc5s2b8fPPPyMuLg633HIL9uzZg6+++grnzp3Dvn37zL6ntg0lJycb1Ff7//v27cPw4cNRVVWFcePGoU2bNkhJScFHH32En376Cb///juaNGmi97pKpRJDhgxBWVkZRo8eDS8vL4Nh1aasXbsWlZWVmDx5Mnx9fTFu3DisWrUKu3fv1mvXwPWh0+PHj8e3336LFi1aYMKECQgMDMT58+exYcMGjBw5Eq1btwYA5Ofn44477sDJkyfRq1cvzJgxAxqNBidPnsQ777yD5557TvcZbdy4ERMmTIBCocD48eMRFhaG7du347XXXsO2bduQnJwMHx8fi87HnAkTJiA1NRUjR46ETCbDhg0bMHPmTMjlcjz++OMA6t6ezJHL5QAALy/9WzXtnPdhw4YZHDN8+HBdPbRSUlKgVCoRGxtrsBBiZGQkOnXqhP3790OtVkMmk6GyshIHDhxAp06dEBkZqVdeIpEgNjYW//nPf3Dw4EHcdtttFtVp9erV2L17N26//XYrfgM35ObmAgCioqIM9gUHByMkJAQXLlxAZmZmrdOMJBIJ5HK5we+iWbNmaNKkCdLT0/HXX3+hVatWun0///wzhBB61ywiIrty9lMCIiJ3s2nTJgFABAQEiOeee05s27at1h5Rc728u3fvFgBEly5d9IaKFhYWio4dOwoAYs+ePbrtaWlpQiqViubNm4usrCy919JoNHpDZ2v2pG/atEn4+vqKTp06GfRg1lZ/mUwm4uPjjf6rPoy2pKRE+Pr6iq5duxq8zpYtW3RDpOt6/rboSRei9uHJxo7573//q+t1VSqVuu0XLlwQTZo0EV5eXuLcuXO67dqedC8vL7Fv3z7d9mvXrolBgwYJACIlJcXo+9dkqr5
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAGKCAYAAABacvENAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAffxJREFUeJzt3Xl8jNf+B/DPzCSZSciqshERuwgRFNFSlCSq1LUUraK2Uqlauukiwm21WkpL6/op2osqXSzVhqC2Cq4ltqBthLRkokQykUgyMuf3Rzojk1kyiclMMvm8X6/ca85znuc5T3JEv+c553skQggBIiIiIiIiInJYUns3gIiIiIiIiIiqFoN/IiIiIiIiIgfH4J+IiIiIiIjIwTH4JyIiIiIiInJwDP6JiIiIiIiIHByDfyIiIiIiIiIHx+CfiIiIiIiIyMEx+CciIiIiIiJycAz+iYiIiIiIiBwcg38iIrKbuXPnQiKRYN++ffZuitVcuXIFEokEY8eOrdL79OzZExKJpErvUVG7du3CI488Am9vb0gkEgwaNMjeTSIiIqJ/MPgnInJAeXl5eO+999ChQwfUrVsXcrkcDRs2RPfu3TF79mykpqbq1W/cuDEaN25sn8ZaQePGjSGRSMx+Xblyxd7NrJCaNjBy5coVPPXUU7h8+TKef/55xMXFYcSIEXZrjxACzZo1g0QiQf/+/e3Wjuqisv3p6NGjGDNmDMLCwuDj4wOFQoFmzZph+PDhOH78uNlzf/jhB/Tt2xf16tWDQqFASEgIRo4ciT///NNo/bS0NEycOBHBwcGQy+Xw8/NDr169sHnzZr16ly5dwsSJExEREYH69etDLpejcePGePLJJ7Fnzx6j19YOlhn7Mva778aNG1iwYAGGDh2KkJAQXV0ioprMyd4NICIi68rNzcWjjz6KM2fOoFmzZhg1ahTq1auHmzdv4tixY3j//ffRtGlTNG3a1N5NtSqZTIa3337b5HEvLy/bNcYGvvrqK+Tn59u7GTq7d+9GQUEBFi1ahGeeecbezcG+ffuQmpoKiUSCnTt34vr16wgMDLR3s2qcgwcPIjExEV27dkXv3r3h5uaGy5cvY9u2bdi8eTO+/PJLPPfcc3rnCCEwefJkrFy5Ek2bNsWIESPg7u6O69evY//+/bh69SqCgoL0zklMTNTNFBkwYACaNGmC27dv48yZM9i9ezeGDRumq3v27Fl8//33iIyMRLdu3eDh4YFr165h69at2LFjB/7973/jrbfeMvo8cXFxBmXGfjekpKTgzTffhEQiQfPmzeHm5lat/r4REVUGg38iIgezZMkSnDlzBhMmTMDKlSsN3lalpaWhsLDQTq2rOk5OTpg7d669m2EzjRo1sncT9Fy/fh0Aqk2A/cUXXwAAZs2ahY8++ghr167Fm2++aedW1TyxsbF45ZVXDMrPnTuHhx9+GK+88gpGjRql93vmk08+wcqVK/Hiiy/ik08+gUwm0zv33r17ep/T09MxdOhQNGjQALt37zbo22XrDxgwAEOGDDH43Xb9+nVEREQgPj4eU6dONRrUW/o7onXr1ti/fz8iIiLg7u6OVq1a4dKlSxadS0RUXXHaPxGRg0lKSgIATJ061eg01ZCQELRq1QrA/fXpV69exdWrV/Wmwpb9j+Q1a9agS5cuqFu3LurWrYsuXbpg7dq1Jttx4MABDBo0CH5+fpDL5QgKCsLgwYNx6NChcp/h3LlzaNiwIby9vS2qXxGPP/44pFIprl69avT4tGnTIJFIkJiYqFde0ecvy9zSirLr93v27In4+HgAQK9evYxOTza15v/evXtYvHgxwsPD4erqCk9PT/Tq1Qvbt283qLt27VpIJBKsXbsWu3btQrdu3eDm5oZ69ephzJgxuHXrVrnPpe1D2jeqpdtbeor5uXPn8PTTT8PX1xdyuRwhISGYPn260Xtov1fZ2dmIjY1FUFAQnJycLP5+Z2dn47vvvkNYWBjmzZsHd3d3rF69GkIIo/WFEFizZg26d+8OLy8vuLm5oXnz5njhhReQnp6uVzc3Nxfx8fFo164d3Nzc4OnpiYiICLzzzjtQq9V6dX/99Vf0799fN12+VatWiIuLM3iDXF6eCIlEgp49e+qVaX/+arUac+fORePGjSGXy9GiRQt89tlnBnXL60+mKBQKo+VhYWFo3bo1bty4AZVKpSu/e/cu4uPj0aRJEyxdutQg8AdKBupKe++996BSqbBixQqjg1pl68vlcqN9PzAwEI888gjUarXJv9+W8vPzQ48ePeDu7v5A1yEiqk745p+IyMHUq1cPAPDbb7+hffv2Zut6eXkhLi4OS5YsAQBMnz5dd6x0sDFt2jR8+umnaNCgAcaPHw8A+O677/D888/j1KlTWLp0qd51ly5dihkzZsDV1RX/+te/0KhRI1y7dg2HDh3Ct99+i0cffdRkmw4dOoQBAwagTp06OHjwIMLCwix/eAs899xz2Lt3L9avX2/wJvjevXvYuHEjAgMD8fjjj+vKK/r8D0obBO7fvx9jxozRBWnlLV0QQmDo0KHYunUrWrRogalTpyIvLw/ffPMNBg4ciMWLF2PGjBkG523btg07duzAgAED0K1bNxw4cABfffUVUlNTyx180fahffv2GbRX+/+HDh1CdHQ0ioqKMHToUDRu3BhJSUlYunQpfvzxRxw5cgQPPfSQ3nULCwvRu3dv3LlzBwMHDoSTkxP8/PzK/d4BwIYNG1BQUIDRo0fD1dUVQ4cOxZo1a7B//36DIFqj0WD48OH49ttv0aBBA4wcORIeHh64cuUKNm3ahH79+ukC0hs3buCxxx7DxYsX0b59e0yZMgUajQYXL17EBx98gFmzZul+Rps3b8bIkSMhl8sxfPhw+Pr6YteuXZg3bx527tyJffv2mQysK2LkyJE4duwY+vXrB5lMhk2bNmHq1KlwdnbGxIkTAVS+P5mTmpqKS5cuISgoCJ6enrryXbt24fbt23j++edRXFyMbdu24bfffoOXlxf69OmDZs2a6V1HCIHNmzejXr166N27N06cOIH9+/dDo9Ggffv26N27N6RSy95V3bp1C0ePHoWbmxuaNGlitM6GDRtw5coVuLm5oX379ujRo4fF1yciqvEEERE5lK1btwoAwt3dXcyaNUvs3LlT3Lx50+w5wcHBIjg42Oix/fv3CwCidevWIjs7W1eelZUlWrRoIQCIAwcO6MqTk5OFVCoVgYGBIi0tTe9aGo1GXLt2Tfc5Li5OABC//PKLru2urq6iZcuW4urVqxY/c3BwsJDJZCIuLs7o1+eff66rq1KphKurqwgNDTW4zvbt2wUA8corr1T6+dPS0gQAMWbMGIM2mvoeP/bYY6LsP8llvzeWnPPll18KAOKxxx4ThYWFuvKrV6+Khx56SDg5OYnU1FRd+Zo1awQA4eTkJA4dOqQrv3fvnujZs6cAIJKSkozevyxT7S0uLhZNmzYVAERCQoLesVdffVUAEOPGjdMrDw4OFgBEdHS0yM/Pt+j+pXXo0EFIpVJdX9u7d68AIEaNGmVQ99NPPxUAxOOPP25wr/z8fHHr1i3d5yFDhggA4s033zS4jlKpFGq1WgghRE5OjvD09BRyuVycPn1aV6e4uFgMHz5cABDz5s3TlZvqM1ran2lp2p9/ly5dRE5Ojq784sWLwsnJSbRs2VKvfnn9qTxHjx4VcXFx4s033xTPPvuscHd3F25ubmLHjh169d555x0BQLz66qu6vx/aL6lUKmbNmqVXPzU1VQAQnTp1EpMmTdKrD0BERESIP//802ibLl26JOLi4sTbb78txo0bp+vjq1atMqir/X6V/WrRooX43//+V+7zt2zZ0uDvGxFRTcPfYkREDmjRokWibt26ev+R27RpUzF16lTx22+/GdQ3F5iOGzdOABDffPONwbH169cbBG9TpkwRAMTq1avLbWfpgGTVqlVCJpOJzp07i7///tvyhxX3g0VTX+Hh4Xr1R44cKQCIEydO6JU//fTTAoBITk6u9PPbM/jv3bu3ACCOHj1qUP/dd981CDq1wf/o0aMN6muPffLJJ0bvX5a
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAGKCAYAAABacvENAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhXRJREFUeJzt3XlcVOX+B/DPzLAMKAOCsqiIiCui4obS6g4uqGX+1LJIza5cKZcWs0ykbteyXNP0tqiVmmnlnijumiiJoiKuiOLCgIoyiLLNPL8/aEbHGZgZBYbl8369uDfO+c45z+EcqO9znuf7SIQQAkRERERERERUbUmt3QAiIiIiIiIiKl9M/omIiIiIiIiqOSb/RERERERERNUck38iIiIiIiKiao7JPxEREREREVE1x+SfiIiIiIiIqJpj8k9ERERERERUzTH5JyIiIiIiIqrmmPwTERERERERVXNM/omIyGpmzJgBiUSCPXv2WLspZebSpUuQSCR4/fXXy/U83bp1g0QiKddzWGr79u14+umnUadOHUgkEgwePNjaTSIiIqJ/MPknIqqGcnNz8d///hcdOnRA7dq1YW9vj4YNG+LZZ5/F1KlTkZKSohffuHFjNG7c2DqNLQONGzeGRCIp9evSpUvWbqZFqlrHyKVLlzBo0CBcvHgRo0aNQlRUFIYPH2619ggh0LRpU0gkEvTv399q7agsHvd5Onz4MMLDwxEQEABXV1fI5XI0bdoUw4YNw5EjR0r97Lp169C7d2+4ublBLpfD19cXI0aMwJUrV4y2zZLf3by8PHz66afw9/eHXC5HnTp10LdvX/z111+ltun48eN4+eWX0aBBA9jb26N+/fro27cvdu/ebTQ+JycHUVFRCAgIgKOjI1xcXNChQwdER0eX/oMjIqqEbKzdACIiKls5OTl45plncOLECTRt2hQjR46Em5sbbt68ifj4eHz++efw8/ODn5+ftZtapmQyGaZNm1bifhcXl4prTAX46aefcO/ePWs3Q2fHjh3Iy8vD7Nmz8fLLL1u7OdizZw9SUlIgkUiwbds2XL9+HfXr17d2s6qc/fv3IzY2Fl27dkWPHj3g6OiIixcvYuPGjVi7di1+/PFHvPrqq3qfEUJg3Lhx+Pbbb+Hn54fhw4fDyckJ169fx969e3H58mV4e3sbnCs8PNxoJ+Sjv7t5eXno2bMnDh48iLZt2yIiIgJ37tzB77//jueffx6///47Bg0aZHCcn376CaNHj4azszMGDBiABg0a4ObNmzhy5AgOHjyI7t2768WnpaWhR48euHjxInr16oX+/fsjPz8fFy5cwO+//46oqCjLf6BERFbE5J+IqJqZN28eTpw4gTfeeAPffvutwdDw1NRU5OfnW6l15cfGxgYzZsywdjMqTKNGjazdBD3Xr18HgEqTYP/www8AgHfeeQdfffUVli9fjg8//NDKrap6IiMj8e677xpsT0pKQufOnfHuu+9i5MiRen9nFixYgG+//Rb//ve/sWDBAshkMr3PFhUVGT3X66+/jm7dupls08KFC3Hw4EEMHToUv/zyi+7406ZNQ4cOHTB27Fj06NEDTk5Ous8kJCRgzJgx6Ny5M/7880/UqVOn1DYVFRVhyJAhuH79Onbu3GnQMVDSNRARVWYc9k9EVM3ExcUBAMaPH290Trivry9atmwJ4MH89MuXL+Py5ct6Q20fTaSXLVuGLl26oHbt2qhduza6dOmC5cuXl9iOffv2YfDgwfDw8IC9vT28vb3x4osv4sCBAyavISkpCQ0bNkSdOnXMirdEz549IZVKcfnyZaP73377bUgkEsTGxuptt/T6H1Xa1IpH5+9369ZNN6y4e/fuunvy8OdLmvNfVFSEOXPmoF27dnBwcICzszO6d++OTZs2GcQuX74cEokEy5cvx/bt2/HUU0/B0dERbm5uCA8Px61bt0xel/YZ0r4Ffbi9Dw8xT0pKwv/93//B3d0d9vb28PX1xcSJE42eQ/uzunPnDiIjI+Ht7Q0bGxuzf97at8ABAQH45JNP4OTkhKVLl0IIYTReCIFly5bh2WefhYuLCxwdHdGsWTP861//Qlpaml5sTk4OoqOj0bZtWzg6OsLZ2Rnt27fHxx9/jMLCQr3Yv/76C/3799cNl2/ZsiWioqIMRmyYqhMhkUgMkmLt/S8sLMSMGTPQuHFj2Nvbo3nz5vjmm28MYk09TyWRy+VGtwcEBKBVq1bIzMyESqXSbb9//z6io6PRpEkTzJ8/3yDxB4o76p7Ehg0bABRPF3j4+H5+fhg9ejRu3LiB3377Te8zH330EdRqNX7++WeDxN9Ym3777TccOXIE7777rkHiXxbXQERkDfzLRURUzbi5uQEAzp07h8DAwFJjXVxcEBUVhXnz5gEAJk6cqNv3cLLx9ttv4+uvv0aDBg0wZswYAMDvv/+OUaNG4dixY5g/f77ecefPn49JkybBwcEBL7zwAho1aoRr167hwIED+O233/DMM8+U2KYDBw4gLCwMtWrVwv79+xEQEGD+xZvh1Vdfxa5du7By5UqDN8FFRUVYvXo16tevj549e+q2W3r9T0qbBO7du1dvKLSpqQtCCLz00kvYsGEDmjdvjvHjxyM3Nxe//vorBg4ciDlz5mDSpEkGn9u4cSO2bNmCsLAwPPXUU9i3bx9++uknpKSkmOx80T5De/bsMWiv9v8PHDiAkJAQFBQU4KWXXkLjxo0RFxeH+fPnY/PmzTh06BDq1q2rd9z8/Hz06NEDd+/excCBA2FjYwMPDw+TPzsAWLVqFfLy8vDaa6/BwcEBL730EpYtW4a9e/caJNEajQbDhg3Db7/9hgYNGmDEiBFQKBS4dOkS1qxZg759++pGWWRmZuL555/HmTNnEBgYiIiICGg0Gpw5cwZffPEF3nnnHd09Wrt2LUaMGAF7e3sMGzYM7u7u2L59Oz755BNs27YNe/bsKTGxtsSIESMQHx+Pvn37QiaTYc2aNRg/fjxsbW0xduxYAI//PJUmJSUFZ8+ehbe3N5ydnXXbt2/fjtu3b2PUqFFQq9XYuHEjzp07BxcXF/Tq1QtNmzYt8Zj79u3D4cOHIZVK0axZM/Tq1Qu1a9c2iFMqlQCKOzIfpd22a9cujBo1CkBxZ9D27dvRvn17NG3aFHv37kV8fDxsbGzQpUsXPPXUUwbH+fXXXwEAQ4cOxZUrV7BlyxbcuXMHfn5+6Nu3r9F2ERFVeoKIiKqVDRs2CADCyclJvPPOO2Lbtm3i5s2bpX7Gx8dH+Pj4GN23d+9eAUC0atVK3LlzR7c9KytLNG/eXAAQ+/bt021PTEwUUqlU1K9fX6SmpuodS6PRiGvXrum+j4qKEgDE7t27dW13cHAQLVq0EJcvXzb7mn18fIRMJhNRUVFGvxYvXqyLValUwsHBQfj7+xscZ9OmTQKAePfddx/7+lNTUwUAER4ebtDGkn7Gzz//vHj0X8mP/mzM+cyPP/4oAIjnn39e5Ofn67ZfvnxZ1K1bV9jY2IiUlBTd9mXLlgkAwsbGRhw4cEC3vaioSHTr1k0AEHFxcUbP/6iS2qtWq4Wfn58AIGJiYvT2vffeewKAGD16tN52Hx8fAUCEhISIe/fumXX+h3Xo0EFIpVLds7Zr1y4BQIwcOdIg9uuvvxYARM+ePQ3Ode/ePXHr1i3d90OGDBEAxIcffmhwHKVSKQoLC4UQQmRnZwtnZ2dhb28vjh8/rotRq9Vi2LBhAoD45JNPdNtLema0tPf0Ydr736VLF5Gdna3bfubMGWFjYyNatGihF2/qeTLl8OHDIioqSnz44YfilVdeEU5OTsLR0VFs2bJFL+7jjz8WAMR7772n+/3QfkmlUvHOO+8YHFvbtke/XFxcxI8//mgQ37VrVwFAnDp1ymDfxIkTBQARFBSk27Zz504BQAwaNEgMGDDA4Dy9e/fW+90WQghvb28BQCxcuFDY29vrxderV++xf45ERNbE5J+IqBqaPXu2qF27tt5/sPr5+Ynx48eLc+fOGcSXlpiOHj1aABC//vqrwb6VK1caJG8RERECgFi6dKnJdj6ckHz//fdCJpO
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAGKCAYAAABacvENAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhzlJREFUeJzs3Xl8TNffB/DPzGSZJJLJQjYiYhdBrBG1lIZQlFYVpRTlR6m1rWoRdFFqp+VRWxVVWrVURYLaKoSQErGVEEuWEsmEyDZznj/SGcZMkglJJpl83q+X52nu/c6958698vO955zvkQghBIiIiIiIiIjIbElN3QAiIiIiIiIiKllM/omIiIiIiIjMHJN/IiIiIiIiIjPH5J+IiIiIiIjIzDH5JyIiIiIiIjJzTP6JiIiIiIiIzByTfyIiIiIiIiIzx+SfiIiIiIiIyMwx+SciIiIiIiIyc0z+iYjIZGbOnAmJRIJDhw6ZuinF5saNG5BIJHj33XdL9Dwvv/wyJBJJiZ6jqMLCwvDSSy/ByckJEokEvXv3NnWTiIiI6D9M/omIzNCjR4/w1VdfoVmzZqhUqRKsra1RrVo1tGvXDlOnTsW1a9d04mvUqIEaNWqYprHFoEaNGpBIJAX+uXHjhqmbWSTl7cXIjRs30KtXL1y/fh1Dhw5FSEgI+vfvb7L2CCFQu3ZtSCQSdO/e3WTtKCue93k6efIkhgwZAj8/Pzg7O0Mul6N27dro168fTp8+XeBnf/vtN3Tu3BkuLi6Qy+Xw8fHBgAEDcOvWLYNtK8rf3YLiC3rxFhcXhxEjRsDb2xvW1tZwc3NDx44dsW3bNr3Yx48fY+HChWjWrBmcnJzg6OiIJk2a4Msvv0RaWppR3x8RUVliYeoGEBFR8UpPT0fbtm1x7tw51K5dG4MGDYKLiwvu3buHyMhIfP3116hVqxZq1apl6qYWK5lMhmnTpuW739HRsfQaUwo2bNiAjIwMUzdDa//+/cjMzMSCBQvw9ttvm7o5OHToEK5duwaJRIJ9+/bh7t278PT0NHWzyp2jR48iPDwcrVu3RqdOnWBra4vr169j165d2LZtG3744Qe88847Op8RQmDUqFFYtWoVatWqhf79+8Pe3h53797F4cOHcfPmTXh5eemda8iQIQZfQub3d9fb29tgou/v728wPjw8XDsapWfPnqhZsyYePHiAc+fOYf/+/ejbt682NicnBx07dsTJkyfh7++vPc+ff/6JadOm4aeffkJkZCRsbW0NnouIqCxi8k9EZGYWL16Mc+fO4b333sOqVav0hobHxcUhKyvLRK0rORYWFpg5c6apm1Fqqlevbuom6Lh79y4AlJkEe82aNQCAyZMnY/78+Vi/fj0+/fRTE7eq/Bk7diw+/PBDve0xMTFo2bIlPvzwQwwaNEjn98zSpUuxatUqvP/++1i6dClkMpnOZ3Nzcw2e691338XLL79sdNtq1Khh9N/5+Ph4vPnmm6hatSr279+v9/fn2Tb99ttvOHnyJF5//XVs375dZ1/v3r2xc+dO/PLLLxg8eLDR7SUiMjUO+yciMjMREREAgDFjxhicE+7j44P69esDeDI//ebNm7h586bO0Nln/1G9bt06BAQEoFKlSqhUqRICAgKwfv36fNtx5MgR9O7dG25ubrC2toaXlxfeeOMNHDt2rNBriImJQbVq1eDk5GRUfFG88sorkEqluHnzpsH948aNg0QiQXh4uM72ol7/swqaWvHs/P2XX34Zs2bNAgB07NhRe0+e/nx+c/5zc3OxcOFCNGnSBDY2NlAoFOjYsSN2796tF7t+/XpIJBKsX78eYWFhaNOmDWxtbeHi4oIhQ4bg/v37hV6X5hkKCQnRa+/TQ8xjYmLw1ltvwdXVFdbW1vDx8cGECRMMnkPzXaWmpmLs2LHw8vKChYWF0d93amoqfv31V/j5+WH27Nmwt7fH2rVrIYQwGC+EwLp169CuXTs4OjrC1tYWderUwf/+9z/Ex8frxKanp2PWrFlo3LgxbG1toVAo0LRpU0yfPh05OTk6sX/99Re6d++uHS5fv359hISE6I3YKKxOhEQi0UuKNfc/JycHM2fORI0aNWBtbY26deviu+++04st7HnKj1wuN7jdz88PDRo0QHJyMpRKpXb748ePMWvWLNSsWRNLlizRS/yBvBd1pe2rr76CUqnEypUrDb44e7ZN169fBwB069ZNL1YzjeTff/8tgZYSEZUc9vwTEZkZFxcXAMCVK1fyHf6q4ejoiJCQECxevBgAMGHCBO2+p5ONcePGYdmyZahatSqGDx8OAPj1118xdOhQnD17FkuWLNE57pIlSzBx4kTY2Njg9ddfR/Xq1XHnzh0cO3YMv/zyC9q2bZtvm44dO4aePXvCzs4OR48ehZ+fn/EXb4R33nkHBw8exKZNm/R6gnNzc7FlyxZ4enrilVde0W4v6vW/KE0SePjwYZ2h0IVNXRBC4M0338TOnTtRt25djBkzBo8ePcLPP/+M1157DQsXLsTEiRP1Prdr1y7s2bMHPXv2RJs2bXDkyBFs2LAB165dK/Tli+YZOnTokF57Nf//2LFjCA4ORnZ2Nt58803UqFEDERERWLJkCX7//XecOHEClStX1jluVlYWOnXqhIcPH+K1116DhYUF3NzcCv3uAGDz5s3IzMzE4MGDYWNjgzfffBPr1q3D4cOH9ZJotVqNfv364ZdffkHVqlUxYMAAODg44MaNG9i6dSu6deumTRaTk5PRoUMHXLp0Cf7+/hg9ejTUajUuXbqEuXPnYvLkydp7tG3bNgwYMADW1tbo168fXF1dERYWhtmzZ2Pfvn04dOhQvol1UQwYMACRkZHo1q0bZDIZtm7dijFjxsDS0hIjRowA8PzPU0GuXbuGy5cvw8vLCwqFQrs9LCwMDx48wNChQ6FSqbBr1y5cuXIFjo6OCAoKQu3atfM95pEjR3Dy5ElIpVLUqVMHQUFBqFSpUr7xqampWLVqFe7duwdnZ2e89NJLaNSokV6cEALbtm2Di4sLOnXqhKioKBw+fBhqtRr+/v7o1KkTpFLd/jDN7529e/dqv0eNPXv2QCKRoGPHjkZ9V0REZYYgIiKzsnPnTgFA2Nvbi8mTJ4t9+/aJe/fuFfgZb29v4e3tbXDf4cOHBQDRoEEDkZqaqt2ekpIi6tatKwCII0eOaLdHR0cLqVQqPD09RVxcnM6x1Gq1uHPnjvbnkJAQAUD8+eef2rbb2NiIevXqiZs3bxp9zd7e3kImk4mQkBCDf1asWKGNVSqVwsbGRvj6+uodZ/fu3QKA+PDDD5/7+uPi4gQAMWTIEL025vcdd+jQQTz7P8nPfjfGfOaHH34QAESHDh1EVlaWdvvNmzdF5cqVhYWFhbh27Zp2+7p16wQAYWFhIY4dO6bdnpubK15++WUBQERERBg8/7Pya69KpRK1atUSAERoaKjOvo8++kgAEMOGDdPZ7u3tLQCI4OBgkZGRYdT5n9asWTMhlUq1z9rBgwcFADFo0CC92GXLlgkA4pVXXtE7V0ZGhrh//7725z59+ggA4tNPP9U7TmJiosjJyRFCCJGWliYUCoWwtrYWf//9tzZGpVKJfv36CQBi9uzZ2u35PTMamnv6NM39DwgIEGlpadrtly5dEhYWFqJevXo68YU9T4U5efKkCAkJEZ9++qkYOHCgsLe3F7a2tmLPnj06cdOnTxcAxEcffaT9+6H5I5VKxeTJk/WOrWnbs38cHR3FDz/8kO93YuhP165dRVJSkk7stWvXBADRokULMXLkSL3PNG3aVNy6dUvnM2q1WvTu3VsAEP7+/mLChAliwoQJokmTJsLBwUF8//33z/U9EhGZEpN/IiIztGDBAlGpUiWdf+DWqlVLjBkzRly5ckUvvqDEdNiwYQKA+Pnnn/X2bdq0SS95Gz16tAAg1q5dW2g7n05IVq9eLWQymWjVqpX4999/jb9Y8SRZzO9PkyZNdOIHDBggAIioqCid7W+99ZYAIKKjo5/7+k2Z/Hfq1EkAECdPntSL//LLL/WSTk3yP3jwYL14zb6lS5caPP+z8mvvkSNHBAD
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+sAAAGKCAYAAAB0N9CfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAf7lJREFUeJzt3Xd8U+X+B/BPkrbpbilQuksps5Q9i8qSMlTWVS7iRVBErwgOcPwuLqheBb0gzot6EVARQVEZDqDIKIVCGUUoZbaFMjpoS5vupsnz+6MmEpK0SUdO2n7er1demnOec873JA9pvnmWTAghQERERERERER2Qy51AERERERERERkiMk6ERERERERkZ1hsk5ERERERERkZ5isExEREREREdkZJutEREREREREdobJOhEREREREZGdYbJOREREREREZGeYrBMRERERERHZGSbrRERERERERHaGyToRUTO2ePFiyGQy7N27V+pQGsylS5cgk8nwyCOPNOp1hg8fDplM1qjXsNbOnTtxxx13oFWrVpDJZJg0aZLUIREREVEjYbJORFQHJSUlePvtt9G3b1+4u7tDqVQiKCgId911FxYuXIjU1FSD8u3bt0f79u2lCbYBtG/fHjKZrMbHpUuXpA7TKk3th4xLly5h4sSJSEtLw6OPPopFixbhwQcflCweIQQ6duwImUyGe++9V7I47EVd61N8fDyef/559OvXD61bt4azszO6du2K//u//0NBQYHZ444cOYJ77rkH3t7ecHNzw+DBg/Hdd9+ZLZ+ZmYnHHnsM/v7+cHZ2RpcuXfDWW29BrVYblU1NTcXixYsxYcIEBAYGQiaT1fj5deHCBbz99tsYOnQoAgIC4OTkhODgYMyYMQNnz541e9z27dsxatQoeHt7w8XFBT169MB7770HjUZj9piioiIsWrQIkZGRcHV1hbe3N/r27YuYmJh6v05xcXF44YUXMGLECHh5ednkR0Eiopo4SB0AEVFTU1RUhDvvvBMnT55Ex44dMX36dLRu3Rq5ublITEzE0qVLER4ejvDwcKlDbVAKhQKvvvqq2f3e3t62C8YGvvrqK5SWlkodht6uXbtQXl6O5cuX46GHHpI6HOzduxepqamQyWTYsWMHrl+/joCAAKnDanIeeOAB5Obm4s4778SMGTP0Cf+7776LTZs24eDBg2jXrp3BMXv27MGYMWPg7OyMBx98EB4eHvjhhx8wdepUXLlyBc8//7xB+aysLAwaNAhXr17F5MmT0alTJ+zbtw+vvvoqEhMTsXnzZoNeJPv370dMTAwUCgW6deuGrKysGu/htddew8aNGxEZGYmJEyfC09MTp06dwtdff41NmzZh+/btGDp0qMExH374IZ599ll4enrib3/7G7y9vbFr1y48//zzSEhIwPfff290nYyMDIwcORJpaWkYNWoU7r33XlRUVODixYv44YcfsGjRonq9TqtXr8aXX34JV1dXhISEQKVS1XjfRESNThARkVXeeOMNAUDMnj1baLVao/1paWnizJkzBttCQ0NFaGiojSL8y6JFiwQAsWfPnnqdJzQ0VCiVyoYJqp7S09MFADFz5sx6naehXhtbiYmJsat4//GPfwgA4oUXXhAAxFtvvSV1SJKqa31aunSpuHbtmsE2rVYr5syZIwCIp556ymCfWq0W4eHhQqlUiqSkJP32goIC0blzZ+Hk5CQuXbpkcMyMGTMEALFy5UqDazz44IMCgFi/fr1B+dTUVJGQkCBKS0uFEEIolcoaP7/WrFkjjh8/brT922+/FQBERESEwfZr164JpVIpWrVqZRCrWq0WEydOFADEt99+a3Tf/fv3Fy4uLmL37t1G11Kr1UbPrX2djhw5IpKTk0VVVZVISEhokM8ZIqL6YLJORGSlcePGCQAGXwDN0SWWph6LFi0yKLt69WoxcOBA4ebmJtzc3MTAgQPFmjVrzJ573759YuLEicLX11c4OTmJoKAgMXnyZLF//359GXMJxKlTp0RgYKDw9vY2KG+ONcn6yJEjhUwmM/oirPP0008LAGLnzp0G2y29f3PJek0/iAwbNkzc+vu07vntj1uPv/0YHbVaLZYvXy569uwpnJ2dhaenpxg+fLjYunWrUdk1a9YIAGLNmjVix44dIioqSri4uAgfHx8xY8YMkZubazJeU/dr6nHr+3rq1CkxZcoU0bZtW+Hk5CTat28vnn32WZPX0L1WN2/eFHPnzhVBQUFCoVDUWN9udfPmTeHs7CwiIyNFaWmp8PDwEOHh4SZ/vBKiOjFcvXq1uPPOO4WXl5dwcXERHTt2FE888YS4fPmyQVmVSiUWL14sevToIVxcXISnp6fo3bu3ePXVV0VlZaVB2fj4eHHPPfeIVq1aCaVSKbp06SJef/11UVJSYvI1NJd4ARDDhg0z2KZ7/ysrK8WiRYtEaGiocHJyEp06dRKffPKJybI11SdrXb9+XQAQ3bt3N9i+Y8cOAUA8+uijRsesXbtWABAxMTH6bSqVSiiVStGhQwej9+fSpUsCgBgxYkSNsdSWrNekc+fOAoC4ceOGftuqVasEAPHiiy8alT9x4oQAIIYOHWqwXZf4v/baaxZd19rX6XZM1onIHrAbPBGRlVq3bg0AOH/+PHr37l1jWW9vbyxatAjvv/8+AOC5557T7xs+fLj+/5955hl89NFHCAwMxGOPPQYA+OGHH/Doo48iKSkJH3zwgcF5P/jgA8yfPx8uLi6YPHkyQkJCcO3aNcTHx2PTpk248847zcYUHx+P8ePHw83NDfv370dkZKTlN2+Bhx9+GLt378Y333yDl19+2WBfVVUVNmzYgICAANx999367dbef33pxqHu27cPM2fO1I/Hra0rvxACDzzwALZs2YLOnTtj7ty5KCkpwcaNGzFhwgS89957mD9/vtFxW7duxS+//ILx48djyJAhiIuLw1dffYXU1FTEx8fXeE1dHdq7d69RvLr/xsfHY8yYMaisrMQDDzyA9u3bIyEhAR988AF+/vlnHDp0CG3atDE4b0VFBUaOHIni4mJMmDABDg4ORt2tzVm/fj3Ky8sxY8YMuLi44IEHHsCaNWuwb98+g3oNAFqtFlOnTsWmTZsQGBiIadOmwdPTE5cuXcJ3332HcePGISQkBACQk5ODYcOG4ezZs+jduzfmzJkDrVaLs2fP4p133sHzzz+vf4++//57TJs2DUqlElOnToWvry927tyJN954Azt27MDevXvh7Oxs0f3UZNq0aUhMTMS4ceOgUCjw3XffYe7cuXB0dMTjjz8OoO71qSaOjo4AAAcHw69qujHxo0ePNjpmzJgx+jh0EhISUFFRgejoaKMJE0NDQ9GlSxccOHAAGo0GCoWizvGaY+o+dN3qw8LCjMrrth08eBAVFRVQKpUAgI0bNwIApkyZgitXruCXX35BQUEBwsPDMW7cOLi7uxucx9rXiYjILkn9awERUVOzZcsWAUB4eHiI559/XuzYsaPWFtKaWn337dsnAIhu3bqJgoIC/fb8/Hx9q1RcXJx++4kTJ4RcLhcBAQEiPT3d4FxardagS+3tLetbtmwRLi4uokuXLkYtmrXFr1AoxKJFi0w+bu1eq1KphIuLi1HXVyGE2LZtm77rdF3vvyFa1oWovduyqWO+/PJLfStsRUWFfvvly5dFmzZthIODg0hNTdVv17WsOzg4iPj4eP32qqoqMXz4cAFAJCQkmLz+7czFq9FoRHh4uAAgtm/fbrDvxRdfFADErFmzDLaHhoYKAGLMmDH6rs7W6Nu3r5DL5fq6tnv3bgFATJ8+3ajsRx99JACIu+++2+hapaWlIi8vT//8/vvvFwDEyy+/bHSerKwsfVfnwsJC4eXlJZRKpfjjjz/0ZTQajZg6daoAIN544w399vq0rA8aNEgUFhbqt589e1Y4ODiILl26GJRv6GEV77zzjsnW5wceeEAAEEePHjV5nLu7uwgODtY///jjjwUAsWzZMpPl77vvPgHAoN7erq4t64cPHxYAxIABAwy2f/rpp7W2rAMQKSkp+u3BwcECgPj444+FUqk06MHQtm1bo9fd2tf
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+0AAAGKCAYAAAB5KaDYAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAVEpJREFUeJzt3Xd41FXa//HPpE0oKQQISei9CFIFo670qmABBRRERLFhAWxYQPZZRfmhosjKoyLsrqCProWyCkTpLoI0QZqKQVQIiNlUIJlkzu+P7IwOSSAJmZnvMO/XdeWS+db7zNwM3jnne47NGGMEAAAAAAAsJ8TfAQAAAAAAgJJRtAMAAAAAYFEU7QAAAAAAWBRFOwAAAAAAFkXRDgAAAACARVG0AwAAAABgURTtAAAAAABYFEU7AAAAAAAWRdEOAAAAAIBFUbQDACrk6aefls1m09q1a/0dSqU5dOiQbDabbr31Vq/ep0ePHrLZbF69R3mtWrVKl19+uWrUqCGbzaZrr73W3yEBAABRtAOA5eTm5urZZ59Vp06dVL16ddntdtWrV09/+tOfNGXKFB08eNDj+EaNGqlRo0b+CbYSNGrUSDab7aw/hw4d8neY5RJov9A4dOiQrrnmGv3www8aO3aspk2bphEjRvgtHmOMmjVrJpvNpquuuspvcVhFZeVTfn6+OnToIJvNplatWpV63FdffaVBgwYpNjZW1apV06WXXqr33nuv2HG5ubl6++23deONN6pFixaqUqWKYmNj1b17d73zzjuVFtPbb7+tO++8U126dJHdbpfNZtPChQvPet2srCxNmjRJDRs2lN1uV6NGjfTwww8rJyenTHEBgJWE+TsAAMDvsrOzdcUVV2jXrl1q1qyZRo0apZo1a+rEiRPasmWLnnvuOTVt2lRNmzb1d6iVKjQ0VE8++WSp+2NjY30XjA/8/e9/18mTJ/0dhttnn32m06dP64UXXtBNN93k73C0du1aHTx4UDabTStXrtSRI0eUlJTk77AC3vTp0/X999+f9Zg1a9aof//+ioyM1IgRIxQVFaUPPvhAw4cP108//aTJkye7j92wYYNGjx6tmjVrqnfv3ho6dKiOHz+uDz/8UDfddJO++OILvfrqq+cd05NPPqkff/xRtWrVUmJion788cezHp+bm6vu3btr586d6tevn0aOHKkdO3Zo1qxZWrdundavX6/IyMizXgMArISiHQAsZPbs2dq1a5duv/12vf7668WGUKempiovL89P0XlPWFiYnn76aX+H4TMNGjTwdwgejhw5IkmWKYznz58vSZo8ebJmzZqlhQsX6vHHH/dzVIFty5Ytev755/Xyyy9rwoQJJR5TUFCgO+64QyEhIVq/fr06dOggSZo6daq6du2qxx9/XMOGDVPDhg0lSQkJCfrHP/6hG2+8UREREe7rPPvss+rWrZvmzp2rW265RV27dq1wTJL05ptvqnnz5mrYsKGee+45TZky5axtnTlzpnbu3KlHH31Uzz33nHv7Y489pueff14vvfTSOa8BAJZiAACWMXDgQCPJ7Nix45zHpqamGkkl/kybNs3j2Lfeest07drVVKtWzVSrVs107drVLFiwoNRrr1u3zlxzzTUmPj7eREREmHr16pnrrrvObNiwwX3MtGnTjCSzZs0aj3N3795t6tata2JjYz2OL03Dhg2N3W4/53HGGNOrVy9js9nMoUOHStx/3333GUlm1apVHtvL2n7XezpmzJhiMTZs2LDEe3bv3t388Z9T1+szf/54/pnnuDgcDvPCCy+Yiy++2ERGRpro6GjTo0cPs3Tp0mLHLliwwEgyCxYsMCtXrjTJycmmSpUqJi4uztxyyy3mxIkTJcZbUntL+vnj57p7925zww03mNq1a5uIiAjTqFEj88ADD5R4D9d79Z///Mfce++9pl69eiY0NPSs+fZH//nPf0xkZKRp27atOXnypImKijJNmzY1TqezxOOdTqd56623zBVXXGFiYmJMlSpVTLNmzcz48ePNjz/+6HFsVlaWefrpp027du1MlSpVTHR0tOnQoYN58sknTX5+vsexGzduNIMGDTI1atQwdrvdtGzZ0kydOtXk5uaW+B6emTMukkz37t09trk+//z8fDNt2jTTsGFDExERYZo3b27mzp1b4rFny6dzOXXqlGnVqpXp3r27cTqdRpJp2bJlseNWrlxpJJmxY8cW27dw4UIjyUyfPr1M93z22WeNJPP//t//O6+YzjRjxgx33pfE6XSapKQkU716dZOTk+OxLycnx1SvXt00adKkTG0AAKugpx0ALKRmzZqSpG+//dbdy1Wa2NhYTZs2TbNnz5YkPfjgg+59PXr0cP/5/vvv15w5c1S3bl2NGzdOkvTBBx9o7Nix2rFjh15++WWP67788suaOHGiqlSpouuuu04NGjTQL7/8oo0bN+qf//ynrrjiilJj2rhxowYPHqxq1appw4YNatu2bdkbXwajR4/W6tWrtWjRomI9rwUFBXr33XeVlJSk3r17u7eXt/3nyzWJ3bp16zRmzBj3fAPnGuJvjNGwYcO0ZMkStWjRQvfee69yc3P1f//3fxoyZIhefPFFTZw4sdh5S5cu1b/+9S8NHjxYl112mdavX6+///3vOnjwoDZu3HjWe7pyaO3atcXidf1348aN6t+/v/Lz8zVs2DA1atRImzZt0ssvv6zly5fryy+/VK1atTyum5eXp169eiknJ0dDhgxRWFiY6tSpc873TpIWL16s06dP65ZbblGVKlU0bNgwLViwQOvWrfPIa0lyOp0aPny4/vnPf6pu3boaOXKkoqOjdejQIb333nsaOHCge1TD8ePH1b17d+3fv18dOnTQ3XffLafTqf379+v555/X5MmT3Z/R+++/r5EjR8put2v48OGKj4/XqlWr9Oc//1krV67U2rVrK2V49ciRI7VlyxYNHDhQoaGheu+993TvvfcqPDxcd9xxh6SK59MfPf744zp8+LCWL19+1gkQXc/M9+vXr9i+/v37u+Moi/DwcElFo2jOJ6by+u6773TkyBH1799f1apV89hXrVo1XX755Vq5cqV++ukn1a9fv9LuCwBe5e/fGgAAfrdkyRIjyURFRZnJkyeblStXnrPH9Gy9wOvWrTOSTOvWrU1GRoZ7e3p6umnRooWRZNavX+/evnPnThMSEmKSkpJMamqqx7WcTqf55Zdf3K/P7GlfsmSJqVKlimnZsmWxHs5zxR8aGmqmTZtW4s9rr73mPjYrK8tUqVLFtGnTpth1li1bZiSZhx56qMLtr4yedmNKH4VwtnP+9re/uXtl8/Ly3Nt//PFHU6tWLRMWFmYOHjzo3u7qaQ8LCzMbN250by8oKDA9evQwksymTZtKvP+ZSou3sLDQNG3a1EgyK1as8Nj38MMPG0nmtttu89jesGFDI8n079/fnDx5skz3/6NOnTqZkJAQd66tXr3aSDKjRo0qduycOXOMJNO7d+9i9zp58qT57bff3K+HDh1qJJnHH3+82HXS0tKMw+EwxhiTmZlpYmJijN1uN19//bX7mMLCQjN8+HAjyfz5z392bz+fnvZu3bqZzMxM9/b9+/ebsLCwYj3O58qns1m3bp0JCQkxs2fP9oippF7tYcOGGUlm69atJV6revXqpn79+ue8Z0FBgWnXrp2x2Wxm9+7d5xXTmc7V0758+XIjyUyYMKHE/RMmTDCSzOeff37OewGAVTB7PABYyJAhQ/TCCy/IGKMXXnhB/fv3V61atdSsWTNNmDBB3333Xbmu97e//U1S0ezTMTEx7u01atTQtGnTJMljFub//d//ldPp1F/+8pdiM9LbbLZSn3meP3++rr/+erVr104bN24s9zPbhYWFmj59eok/8+bNcx8XFRWla6+9Vnv37tX27ds9rvGPf/xDkjRq1KgKt9+fXLHOnDnT4/ngBg0aaOLEiSooKNCiRYuKnXfTTTfp8ssvd78ODQ3VmDFjJBXNAn4+vvjiCx08eFADBw5097S6TJ06VXFxcVq8eLHy8/OLnTtz5kxVqVKlXPfbuXOntm/frt69e7tzrUe
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_account('200001928')\n",
"plot_account('366351')\n",
"plot_account('365966')\n",
"plot_account('365568')\n",
"plot_account('200129601')\n",
"plot_account('402410')\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "31407450-a833-4fce-8b0b-dba1b1de585f",
"metadata": {},
2025-12-05 11:03:19 +01:00
"outputs": [],
2025-12-05 09:49:17 +01:00
"source": [
"# 1. Prepare stock dataset ISIN-by-ISIN\n",
"stocks_isin = stocks[[\n",
" \"Registrar Account - ID\", \"Product - Isin\",\n",
" \"Centralisation Date\", \"Quantity - AUM\"\n",
"]].copy()\n",
"\n",
"stocks_isin[\"Centralisation Date\"] = pd.to_datetime(stocks_isin[\"Centralisation Date\"])\n",
"stocks_isin = stocks_isin.sort_values(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")\n",
"\n",
"# 2. Prepare flows dataset ISIN-by-ISIN\n",
"flows_isin = flows[[\n",
" \"Registrar Account - ID\", \"Product - Isin\",\n",
" \"Centralisation Date\", \"Quantity - NetFlows\"\n",
"]].copy()\n",
"\n",
"flows_isin[\"Centralisation Date\"] = pd.to_datetime(flows_isin[\"Centralisation Date\"])\n",
"\n",
"flows_isin = flows_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")[\"Quantity - NetFlows\"].sum().reset_index()\n",
"\n",
"# 3. Merge stocks & flows ISIN-by-ISIN\n",
"merged_isin = stocks_isin.merge(\n",
" flows_isin,\n",
" on=[\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"merged_isin[\"Quantity - NetFlows\"] = merged_isin[\"Quantity - NetFlows\"].fillna(0)\n",
"\n",
"# 4. Compute expected stock per ISIN for each account\n",
"merged_isin[\"prev_stock\"] = merged_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - AUM\"].shift(1)\n",
"\n",
"merged_isin[\"prev_netflows\"] = merged_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - NetFlows\"].shift(1).fillna(0)\n",
"\n",
"merged_isin[\"expected_stock\"] = merged_isin[\"prev_stock\"] + merged_isin[\"prev_netflows\"]\n",
"\n",
"# 5. Detect ruptures ISIN-by-ISIN (no aggregation)\n",
"TOL = 1e-6\n",
"merged_isin[\"gap\"] = merged_isin[\"Quantity - AUM\"] - merged_isin[\"expected_stock\"]\n",
"merged_isin[\"rupture_flag\"] = (\n",
" merged_isin[\"prev_stock\"].notna()\n",
" & (merged_isin[\"gap\"].abs() > TOL)\n",
")\n",
"\n",
"# 6. Summarize ruptures per (Account, ISIN)\n",
"rupture_isin_summary = merged_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
").agg(\n",
" n_ruptures=(\"rupture_flag\", \"sum\"),\n",
" obs=(\"rupture_flag\", \"count\"),\n",
" rupture_ratio=(\"rupture_flag\", \"mean\"),\n",
" max_gap=(\"gap\", lambda x: x.abs().max())\n",
").reset_index()\n",
"\n",
"# Sort by worst ISIN trajectories\n",
"rupture_isin_summary = rupture_isin_summary.sort_values(\n",
" \"rupture_ratio\", ascending=False\n",
")\n",
"\n",
2025-12-05 11:03:19 +01:00
"rupture_isin_summary.head(20)\n",
"\n",
"rupture_isin_summary.to_csv('csv_outputs/rupture_isin_summary.csv')\n"
2025-12-05 09:49:17 +01:00
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 20,
2025-12-05 09:49:17 +01:00
"id": "baa4b6cd-887d-45a6-af27-253a9aa8710f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
2025-12-05 11:03:19 +01:00
"execution_count": 20,
2025-12-05 09:49:17 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Strong ruptures (ratio > 0.5 OR huge max_gap)\n",
"strong = rupture_isin_summary[\n",
" (rupture_isin_summary[\"rupture_ratio\"] > 0.5)\n",
" | (rupture_isin_summary[\"max_gap\"] > 50000)\n",
"]\n",
"\n",
"def find_successors(account_id, isin, window_days=15):\n",
" # Extract rupture dates for (account_id, isin)\n",
" ruptures = merged_isin[\n",
" (merged_isin[\"Registrar Account - ID\"] == account_id)\n",
" & (merged_isin[\"Product - Isin\"] == isin)\n",
" & (merged_isin[\"rupture_flag\"])\n",
" ][\"Centralisation Date\"].unique()\n",
"\n",
" if len(ruptures) == 0:\n",
" return []\n",
"\n",
" candidates = []\n",
"\n",
" for rupture_date in ruptures:\n",
" start = rupture_date - pd.Timedelta(days=window_days)\n",
" end = rupture_date + pd.Timedelta(days=window_days)\n",
"\n",
" # Look for accounts with strong positive jump at the same time\n",
" window_df = merged_isin[\n",
" (merged_isin[\"Centralisation Date\"] >= start)\n",
" & (merged_isin[\"Centralisation Date\"] <= end)\n",
" & (merged_isin[\"Product - Isin\"] == isin)\n",
" ]\n",
"\n",
" # Look for positive gap (jump)\n",
" pos_jumps = window_df[window_df[\"gap\"] > 0]\n",
"\n",
" candidates.extend(pos_jumps[\"Registrar Account - ID\"].unique())\n",
"\n",
" # Remove self\n",
" candidates = [c for c in candidates if c != account_id]\n",
"\n",
" return list(set(candidates))\n",
"\n",
"find_successors(\"200129601\", \"FR0010135103\")\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 24,
2025-12-05 09:49:17 +01:00
"id": "0b834da2-f781-476d-84a6-aebb38fb8dac",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCcAAAJ0CAYAAAAh/0nHAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd4VMXewPHvbnrvoYUk1NB77x2k9yJKERuKckUUC76AoIh6UbwIFlBsiCAgvUhv0nsSeglJIL2Rnuy8f2yyyZJNoYSN+vs8Tx7YU+bMmT1zzuycKRqllEIIIYQQQgghhBDCTLTmjoAQQgghhBBCCCH+3aRyQgghhBBCCCGEEGYllRNCCCGEEEIIIYQwK6mcEEIIIYQQQgghhFlJ5YQQQgghhBBCCCHMSionhBBCCCGEEEIIYVZSOSGEEEIIIYQQQgizksoJIYQQQgghhBBCmJVUTgghhBBCCCGEEMKsLM0dASGEEEIIIR6VkJAQvvvuO1xcXJg8eTJarbyLE0KIvwOpnBBCCCGEEP8IGRkZDBs2jNOnT7NlyxapmBBCiL8RuWOLMsXf3x+NRsOyZcuK3K5Tp05oNBpmzpz5WOIlHlzud7Vnz55SP1bu9XPjxo1SP5Z4NMaNG1eiPF+WzZw5U+5HAoAFCxag0WhYvXq1uaPyrzV16lSOHTvG999/T5cuXcwdnX+NZcuWodFoGDdu3EOF8+yzz2Jpacm5c+ceTcSEEH8rUjkhxEPQaDRoNBpzR0OIMulRFVZF2Xbjxg00Gg3+/v7mjopZRUVFMXPmTJo3b86QIUPMHZ1/pVWrVvG///2Pjz76iCeffNLc0SmWVGwWNHPmTKysrHj11VfNHRUhhBlI5YQQQgizmTt3LsHBwQwaNMjcURHiocyaNYv4+Hj5oWkmSilu3brF119/zZtvvmnu6PzrDBo0iODgYObOnftQ4fj4+PDss8+yZ88e1q9f/4hiJ4T4u5DKCSGEEGZToUIFatWqhYuLi7mjIsQDi4+PZ9myZVSqVIlevXqZOzr/ShqNhilTpvD888+bOyr/Si4uLtSqVYsKFSo8dFgTJkwA4PPPP3/osIQQfy9SOSH+kS5dusQLL7xAtWrVsLW1xcXFhQ4dOvDzzz+b3P7mzZvMmzePLl264Ovri42NDa6urrRr146vv/4anU5ntH1uU8xcud07cv9yxzzI36w9ISGBKVOm4O/vj62tLTVq1GDevHmGsMPCwnjhhReoXLkyNjY2BAQE8L///e+RxBeMm15nZWXx8ccfU7duXezs7PD09GT48OFcuHDhQZKbW7du8cwzz1ChQgXDub377rukpqYWu+/vv/9Or1698PLywtramkqVKvHUU08RFBT0QHEx5UHSqzj5u/R8++23NG3aFAcHB1xdXenduzeHDx82uV/+cTHWrVtHly5dcHd3LzAux4ULFxg/fjx+fn7Y2Njg7u5O165dWblypclw8zcPvnnzJmPGjDF8HzVr1mTmzJlFfh/btm2jb9++eHt7Y21tTcWKFRkxYgTHjx83uX3+sUT2799Pv3798PLyQqvVsmzZMvz9/Rk/fjwAP/zwg1H+6NSpkyGc4sacWLFiBV27dsXd3R0bGxv8/Px45plnuHTpUrHpu3v3bnr06IGbmxt2dnY0adKEH3/8sdA0KEpqaiozZ86kRo0a2NjYUKFCBcaOHUtISEix+544cYLRo0cbrj13d3d69uzJ5s2b7zsexaU7FJ+mhXW3yb88JiaGl19+2RBnPz8/XnvtNeLi4oz2GTduHFWqVAH0+ezee2H+7R42TrGxsfznP/+hWrVq2NjYGF1HADt37mTw4MFUqFABa2trvL29GTRoEH/99ZfJY16+fJlnnnmGKlWqYGNjg6OjI35+fvTp04fvv//e5D6F+f7770lOTubpp582OQBj/vM/c+YMgwcPxsvLCzs7Oxo0aMCCBQvIzs4usF9SUhLffvstgwcPpkaNGjg4OODg4ED9+vV59913iY+PNxmfkt5nCvMojns/+S//dX369GkGDx6Mp6cnNjY21KlTh//+978opQqN7/1+96DP0//9739p1aoVrq6u2NraEhAQwJtvvklMTEyxaWRKeHg4U6ZMoXbt2tjb2+Pk5ETz5s1ZuHAhWVlZRttqNBpmzZoF6Fvd5M83Je0Klz/dDh8+TJ8+ffDw8MDJyYmOHTuyf/9+w7Zbt26la9euuLm54ejoSPfu3Tl58mShYd/vM6iobnw7duygX79+lCtXDisrK9zc3KhRowZPPfUU+/btK7B9o0aNaNiwIbt37yY4OLhEaSGE+IdQQpQhfn5+ClDff/99kdt17NhRAWrGjBkF1q1cuVLZ2toqQNWqVUsNGjRIdenSRTk4OChAjR8/vsA+s2fPVoCqUqWK6tq1qxo5cqTq2LGjsra2VoAaPHiw0ul0hu3Xrl2rxo4dqwAFqLFjxxr9RUVFKaWU+v777xWgBgwYoGrXrq28vb3VkCFDVI8ePZSdnZ0C1KRJk9SVK1dU+fLlVeXKldXw4cNV586dlYWFhQLURx999NDxVUqp69evK0D5+fmpwYMHKysrK9WtWzc1cuRIVbVqVQUoR0dHdejQoRJ8U3mCg4OVt7e3AlSFChXUsGHDVO/evZWdnZ1q3bq1at26tQLU7t27jfbLzMxUw4cPV4CysbFRbdq0UcOGDVMNGzZUgLKzs1Nbtmy5r7jkXj/Xr19/6PQqTu53/9prrymNRqPatWunRo0aperVq6cAZWlpqdasWVNoHCdNmqQA1axZMzVq1CjVsWNHtW/fPqWUUhs3bjRcwwEBAWrkyJGqS5cuhmvimWeeKRDujBkzFKDGjBmjPDw8VLly5dSwYcNU3759Ddd+27ZtVWpqaoF9p0+frgCl0WhU27Zt1ahRo1SjRo0UoCwsLNTSpUsL7JObB1966SWl1WpVnTp11MiRI1WPHj3U8uXL1euvv67atm2rAFWtWjWj/DF37lxDOLn56N48r9Pp1JgxYwxp2aVLFzVy5EhVs2ZNBSh7e3uT10du+r733ntKo9Gopk2bqpEjR6pWrVoZvrPPPvusuK/XSHJysmF/BwcH1bdvXzVs2DBVrlw55eHhYYinqfvR559/rrRarQJUo0aN1NChQ1W7du0M196sWbPuKy7FpbtShadprtz70tixY00u79+/v6pWrZpydXVVAwcOVIMGDVJubm6G6zEyMtKwz7fffquGDBliSJt774W5HjZOffr0UVWqVFFubm6qf//+atiwYWr06NGG7V5//XUFKK1Wq1q0aKGGDRumWrZsqTQajbKwsFDfffedUbjnzp1Tzs7OhnMaPHiwGjZsmGrdurVydHRUDRs2LPJ7uFeHDh0UoHbs2GFyfe75T5w4Udna2ip/f381YsQI1aNHD8O1MHTo0AL3of379ytAeXl5qXbt2hn28fDwUICqXr26io6OLnC8ktxnivKwx73f/Jd7Xb/11lvK2tpa1a5d23Cfzr3vTZ482WRc7/e7V0qpsLAwVb9+fQUod3d31a1bNzVo0CBD/P39/dWNGzeKTaf89u7da8gn/v7+qn///qpnz56GZT169FAZGRmG7ceOHWt43jVs2NAo33z77bclOmZuuk2dOlVZWlqqxo0bqxEjRhju3zY2NurgwYNq4cKFSqvVqjZt2qjhw4cb7qOOjo7q8uXLBcJ9kGdQYXl42bJlSqPRKI1Go1q2bKlGjBih+vfvr5o0aaIsLCwK/V6nTp2qAPXhhx+WKC2EEP8MUjkhypSHrZw4e/assrGxUba2tmr16tVG627cuGEojPzwww9G644eParOnTtX4DhhYWGGwsPKlSsLrM8tbBUm92ENqH79+qnk5GTDuhMnTihLS0vDD4wXX3xRZWZmGtb/8ccfClDOzs5G+z1ofHMrJwDl6empzpw5Y1iXlZWlXnnlFUPlRVpaWqHndK/mzZsrQA0fPtzoh+/NmzdVtWrVDMe8t3LinXfeUYBq2bKlunbtmtG6VatWKQsLC+Xm5qbi4uJKHJfCKice9PstSu552dnZqZ07dxqt+/jjjxWgXFxcVEREhMk
"text/plain": [
"<Figure size 1400x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df = merged_isin.copy()\n",
"\n",
"# Ajouter année / mois\n",
"df[\"year\"] = df[\"Centralisation Date\"].dt.year\n",
"df[\"month\"] = df[\"Centralisation Date\"].dt.month\n",
"\n",
"# 1. Nombre total de lignes par mois\n",
"total = df.groupby([\"year\", \"month\"]).size().reset_index(name=\"total_lines\")\n",
"\n",
"# 2. Nombre de ruptures par mois\n",
"ruptures = df[df[\"rupture_flag\"]].groupby([\"year\", \"month\"]).size().reset_index(name=\"n_ruptures\")\n",
"\n",
"# 3. Merge pour obtenir total + ruptures\n",
"ratio = total.merge(ruptures, on=[\"year\",\"month\"], how=\"left\")\n",
"ratio[\"n_ruptures\"] = ratio[\"n_ruptures\"].fillna(0)\n",
"\n",
"# 4. Proportion (en %)\n",
"ratio[\"rupture_ratio\"] = ratio[\"n_ruptures\"] / ratio[\"total_lines\"]\n",
"\n",
"# 5. Pivot pour heatmap\n",
"heatmap_ratio = ratio.pivot(index=\"year\", columns=\"month\", values=\"rupture_ratio\").fillna(0)\n",
"\n",
"# 6. Plot\n",
"plt.figure(figsize=(14, 7))\n",
"sns.heatmap(\n",
" heatmap_ratio, \n",
" cmap=\"Reds\",\n",
" linewidths=.3,\n",
" linecolor=\"grey\",\n",
" annot=True,\n",
" fmt=\".2%\",\n",
" cbar_kws={'label': 'Proportion de ruptures'}\n",
")\n",
"\n",
"plt.title(\"Heatmap de la proportion de ruptures (par année et mois)\", fontsize=16)\n",
"plt.xlabel(\"Mois\")\n",
"plt.ylabel(\"Année\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 27,
2025-12-05 09:49:17 +01:00
"id": "aa5862ab-ec8e-47f8-8cb0-cd51503efed8",
"metadata": {},
"outputs": [],
"source": [
"df = merged_isin.copy()\n",
"\n",
"# Ajouter year / month au cas où\n",
"df[\"year\"] = df[\"Centralisation Date\"].dt.year\n",
"df[\"month\"] = df[\"Centralisation Date\"].dt.month\n",
"\n",
"# Merge géographique\n",
"df = df.merge(\n",
" geo[[\"Registrar Account - ID\", \"country\"]],\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"df[\"country\"] = df[\"country\"].fillna(\"UNKNOWN\")\n",
"\n",
"# Total des lignes par pays\n",
"total_country = df.groupby(\"country\").size().reset_index(name=\"total_obs\")\n",
"\n",
"# Nombre de ruptures\n",
"rupt_country = (\n",
" df[df[\"rupture_flag\"]]\n",
" .groupby(\"country\")\n",
" .size()\n",
" .reset_index(name=\"ruptures\")\n",
")\n",
"\n",
"# Merge + ratios\n",
"country_stats = total_country.merge(rupt_country, on=\"country\", how=\"left\")\n",
"country_stats[\"ruptures\"] = country_stats[\"ruptures\"].fillna(0)\n",
"country_stats[\"rupture_ratio\"] = country_stats[\"ruptures\"] / country_stats[\"total_obs\"]\n",
"\n",
"# Tri (rupture ratio décroissant)\n",
"country_stats = country_stats.sort_values(\"rupture_ratio\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "86d2a91c-d8d8-416c-8dc4-dc3f4ae7ca90",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"customdata": {
"bdata": "xU7sxE5sWEAAAAAAAMBvQAAAAAAAQHBA198e2OhdR0AAAAAAABB9QAAAAAAAGI9AlH+9dRU2RUAAAAAAAJjBQAAAAACAvNRAHFtG3vbHQ0AAAAAAAICOQAAAAAAARqNA0F5CewntOUAAAAAAAAA8QAAAAAAAAFtANJzUfXlsOUAAAAAAAAAuQAAAAAAAgE1A4SpTwHwgOUAAAAAAAMBYQAAAAAAAoHhAZluhtRLZN0AAAAAAYMHrQAAAAACgGA1BXn0WV0YbNkAAAAAAuMQCQQAAAACYOSVBPxKbCsIRNkAAAAAAYM3zQAAAAACMbhZBzafGIeilM0AAAAAAQEn3QAAAAAAIoR1By3uP6ZWYMUAAAAAA4MPqQAAAAAB8AxNBNeF+QPqVLkAAAAAAwBLaQAAAAADATwVBb8JdUw+UKkAAAAAAwKHWQAAAAACwSQVBjhDbtOLeKUAAAAAAAOBxQAAAAAAARqFARQMgZFR6KEAAAAAAAMCMQAAAAAAAXb1ATK4gJleQJ0AAAAAAALCbQAAAAAAAYM1A05E9A1HpJkAAAAAAAKBmQAAAAAAAsJhAKTDp1DzIJUAAAAAAAAA2QAAAAAAAQGlAj8DhOzKPJEAAAAAA0FELQQAAAAA1nEBBIPw39b67I0AAAAAAALagQAAAAADAK9VA1sr0lOakIUAAAAAAALCAQAAAAAAApbdAL/vifHPjIEAAAAAAAPixQAAAAABgmepATufRh/2UHUAAAAAAAABXQAAAAAAAcJNAyEIWspCFHEAAAAAAAIBEQAAAAAAA+IFALDDozo3/EUAAAAAAAHCbQAAAAABADuNAYQb4iZOHDkAAAAAAAIBIQAAAAAAAEJRAawqzdyc0DkAAAAAAAFiiQAAAAADgXe5AfscrRvyXDUAAAAAAAIBOQAAAAAAAxJlAeeJY/KtzBEAAAAAAAIBFQAAAAAAASJpAyp2ihTMgBEAAAAAAAABcQAAAAAAAZLFAAAAAAADAAkAAAAAAAGBoQAAAAAAAQMBAC0fmBDyf/j8AAAAAAAAQQAAAAAAAIGpAmb5KGJsF9T8AAAAAAAAiQAAAAAAAaIVAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADZAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGRAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFRA",
"dtype": "f8",
"shape": "37, 3"
},
"hovertemplate": "Pays=%{x}<br>% de ruptures=%{customdata[0]:.2f}<br>Nb de ruptures=%{customdata[1]}<br>Nb d'observations=%{customdata[2]}<extra></extra>",
"legendgroup": "",
"marker": {
"color": "#636efa",
"pattern": {
"shape": ""
}
},
"name": "",
"orientation": "v",
"showlegend": false,
"textposition": "auto",
"type": "bar",
"x": [
"JAPAN",
"TAIWAN",
"SWEDEN",
"DENMARK",
"SOUTH AFRICA",
"LITHUANIA",
"FINLAND",
"GERMANY",
"ITALY",
"LUXEMBOURG",
"SWITZERLAND",
"SPAIN",
"BELGIUM",
"UNITED KINGDOM",
"MAURITIUS",
"PORTUGAL",
"AUSTRIA",
"UNITED ARAB EMIRATES",
"CZECH REPUBLIC",
"FRANCE",
"MONACO",
"ISRAEL",
"NETHERLANDS",
"GREECE",
"CANADA",
"LATAM",
"INTERNATIONAL",
"US OFFSHORE",
"MALTA",
"IRELAND",
"UNKNOWN",
"SINGAPORE",
"POLAND",
"HONG KONG",
"NORWAY",
"SOUTH KOREA",
"UNITED STATES"
],
"xaxis": "x",
"y": {
"bdata": "L/RCL/RC7z/W1t8e2OjdP6kyY96BJts/gO/U1N9R2T97Ce0ltJfQP5zUfXlsRdA/ryWHuMoU0D/+dCBZfobOP+ndyiffS8w/y1RqsbA/zD/yMlDZRybJP2DwmJvehcY/S+xvhS+Twz89SR0rmQLBP624d5KljsA/WGCP5uZUvz/S/YVPZSm+P3SHbeWaU70/uetni5Xhuz+iUrAj5FC6P83wKBs8Qrk/3w1YBqKVtj+OGOV2BJ61P2UjU7227rI/V8T5cQBBsj++60dGqwmnP8NLYQb4iaM/OsmlI5RUoz8T5pZ0ofCiP8ScFfGoLZo/AqHkTtHCmT8AAAAAAACYP2N/GCwcmZM/40X5mXToij8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"dtype": "f8"
},
"yaxis": "y"
}
],
"layout": {
"bargap": 0.2,
"barmode": "relative",
"legend": {
"tracegroupgap": 0
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermap": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermap"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Proportion de ruptures par pays (avec volumes au survol)"
},
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"tickangle": -45,
"title": {
"text": "Pays"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"tickformat": ".1%",
"title": {
"text": "Proportion de ruptures"
}
}
}
2025-12-05 11:03:19 +01:00
}
2025-12-05 09:49:17 +01:00
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# On ajoute une colonne en % pour laffichage\n",
"country_stats_plot = country_stats.copy()\n",
"country_stats_plot[\"rupture_pct\"] = country_stats_plot[\"rupture_ratio\"] * 100\n",
"\n",
"# Tri décroissant par proportion de ruptures\n",
"country_stats_plot = country_stats_plot.sort_values(\"rupture_ratio\", ascending=False)\n",
"\n",
"fig = px.bar(\n",
" country_stats_plot,\n",
" x=\"country\",\n",
" y=\"rupture_ratio\",\n",
" hover_data={\n",
" \"rupture_pct\": ':.2f',\n",
" \"ruptures\": True,\n",
" \"total_obs\": True,\n",
" \"rupture_ratio\": False, # on cache la version décimale\n",
" },\n",
" labels={\n",
" \"country\": \"Pays\",\n",
" \"rupture_ratio\": \"Proportion de ruptures\",\n",
" \"rupture_pct\": \"% de ruptures\",\n",
" \"ruptures\": \"Nb de ruptures\",\n",
" \"total_obs\": \"Nb d'observations\"\n",
" },\n",
" title=\"Proportion de ruptures par pays (avec volumes au survol)\"\n",
")\n",
"\n",
"# Format en %\n",
"fig.update_yaxes(tickformat=\".1%\")\n",
"\n",
"fig.update_layout(\n",
" xaxis_tickangle=-45,\n",
" bargap=0.2\n",
")\n",
"\n",
"fig.show()\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 32,
2025-12-05 09:49:17 +01:00
"id": "e1c114db-5fbd-4cd3-a897-b9d4c96053fd",
"metadata": {},
"outputs": [],
"source": [
"df[df['country']=='JAPAN'].to_csv('Japan.csv')"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 33,
2025-12-05 09:49:17 +01:00
"id": "95bc353d-e883-4989-aaca-1b3c9b51ee5a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"=== BASIC NUMERIC STATS ===\n",
"count 12501.000000\n",
"mean 0.069464\n",
"std 0.137360\n",
"min 0.000000\n",
"1% 0.000000\n",
"5% 0.000000\n",
"10% 0.000000\n",
"25% 0.000000\n",
"50% 0.013699\n",
"75% 0.069767\n",
"90% 0.213497\n",
"95% 0.333333\n",
"99% 0.720000\n",
"max 0.992308\n",
"Name: rupture_ratio, dtype: float64\n",
"rupture_bucket\n",
"00.1% 12\n",
"0.11% 619\n",
"15% 3078\n",
"510% 1189\n",
"1025% 1463\n",
"2550% 703\n",
"50100% 311\n",
"0% 5126\n",
"Name: count, dtype: int64\n",
"\n",
"=== DISTRIBUTION (PERCENT) ===\n",
"rupture_bucket\n",
"00.1% 0.10\n",
"0.11% 4.95\n",
"15% 24.62\n",
"510% 9.51\n",
"1025% 11.70\n",
"2550% 5.62\n",
"50100% 2.49\n",
"0% 41.00\n",
"Name: count, dtype: float64\n",
"\n",
"Comptes avec 0 rupture = 5126 (41.00%)\n",
"Comptes avec rupture_ratio > 75% = 99 (0.79%)\n",
"Comptes avec rupture_ratio > 10% = 2477 (19.81%)\n"
]
}
],
"source": [
"rs = rupture_summary.copy()\n",
"\n",
"# 1. Stats numériques classiques\n",
"print(\"\\n=== BASIC NUMERIC STATS ===\")\n",
"print(rs[\"rupture_ratio\"].describe(percentiles=[0.01, 0.05, 0.10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.99]))\n",
"\n",
"\n",
"# 2. Distribution par classes (bins)\n",
"\n",
"rs[\"rupture_bucket\"] = pd.cut(\n",
" rs[\"rupture_ratio\"],\n",
" bins=[0, 0.001, 0.01, 0.05, 0.10, 0.25, 0.50, 1.01],\n",
" labels=[\n",
" \"00.1%\",\n",
" \"0.11%\",\n",
" \"15%\",\n",
" \"510%\",\n",
" \"1025%\",\n",
" \"2550%\",\n",
" \"50100%\"\n",
" ],\n",
" include_lowest=True\n",
")\n",
"\n",
"# Ajouter la catégorie \"0%\"\n",
"rs[\"rupture_bucket\"] = rs[\"rupture_bucket\"].cat.add_categories(\"0%\")\n",
"\n",
"# Remplacer les 0% exacts\n",
"rs.loc[rs[\"rupture_ratio\"] == 0, \"rupture_bucket\"] = \"0%\"\n",
"\n",
"bucket_counts = rs[\"rupture_bucket\"].value_counts().sort_index()\n",
"print(bucket_counts)\n",
"\n",
"\n",
"# 3. Pourcentages\n",
"bucket_percent = (bucket_counts / len(rs) * 100).round(2)\n",
"\n",
"print(\"\\n=== DISTRIBUTION (PERCENT) ===\")\n",
"print(bucket_percent)\n",
"\n",
"\n",
"# 4. Nombre de comptes totalement propres\n",
"no_rupture = (rs[\"n_ruptures\"] == 0).sum()\n",
"print(f\"\\nComptes avec 0 rupture = {no_rupture} ({no_rupture/len(rs)*100:.2f}%)\")\n",
"\n",
"# 5. Comptes extrêmement problématiques\n",
"severe = (rs[\"rupture_ratio\"] > 0.75).sum()\n",
"print(f\"Comptes avec rupture_ratio > 75% = {severe} ({severe/len(rs)*100:.2f}%)\")\n",
"\n",
"medium = (rs[\"rupture_ratio\"] > 0.10).sum()\n",
"print(f\"Comptes avec rupture_ratio > 10% = {medium} ({medium/len(rs)*100:.2f}%)\")\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 35,
2025-12-05 09:49:17 +01:00
"id": "425b36d0-c92a-4405-be28-35b1fc292fec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre total de ruptures en 2021 : 71090\n",
"\n",
"=== RUPTURES 2021 — POSITIVES vs NEGATIVES ===\n",
"gap_type\n",
"negative 38092\n",
"positive 32998\n",
"Name: count, dtype: int64\n",
"\n",
"(%)\n",
"gap_type\n",
"negative 53.58%\n",
"positive 46.42%\n",
"Name: proportion, dtype: object\n",
"\n",
"=== STATISTIQUES DES GAPS ===\n",
" count mean std min 25% \\\n",
"gap_type \n",
"negative 38092.0 -7150.538497 99000.868201 -5.257530e+06 -438.04975 \n",
"positive 32998.0 9865.849003 139488.218647 1.000000e-05 22.76075 \n",
"\n",
" 50% 75% max \n",
"gap_type \n",
"negative -57.9725 -2.74375 -6.000000e-06 \n",
"positive 159.3720 1007.14050 1.814960e+07 \n"
]
},
{
"data": {
2025-12-05 11:03:19 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2QAAAHWCAYAAAAYdUqfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhOlJREFUeJzt3XlcVNX7B/DPzMCwySIim6IirriviJpaoqhkWpaaprj/LLCUXLJF0Urb3MqtvqmoabmUWmpuuKXhRpI7LqlYsrkAAsoyc35/wFwYZtiG0WH5vF+vecHce+69Zy6j8MxzznNkQggBIiIiIiIieubkpu4AERERERFRVcWAjIiIiIiIyEQYkBEREREREZkIAzIiIiIiIiITYUBGRERERERkIgzIiIiIiIiITIQBGRERERERkYkwICMiIiIiIjIRBmREREREREQmwoCMiCq10NBQyGSyZ3KtHj16oEePHtLzw4cPQyaTYevWrc/k+qNGjUK9evWeybVKQvP6Dx8+bOquVEhhYWGQyWS4deuWqbtCRERPEQMyIqowNH+gah6WlpZwd3eHv78/vv76azx69Mgo17l79y5CQ0MRFRVllPMZU3nuG1F+f/75J0JDQ5GUlGTqrpjM6dOnERwcjGbNmsHGxgZ16tTB4MGDcfXqVb3tL1++jD59+qBatWpwdHTEiBEjkJiYqNXmypUrmD59Olq3bg1bW1u4ubkhICAAZ86c0TlfdHQ0pkyZgs6dO8PS0pIBPlE5xYCMiCqcuXPnYv369VixYgUmTZoEAJg8eTJatGiBc+fOabX98MMP8fjx41Kd/+7du5gzZ06pg559+/Zh3759pTqmtIrq2//+9z9ER0c/1esTldSff/6JOXPmVOmA7PPPP8fPP/+Mnj17YsmSJZgwYQKOHj2Ktm3b4sKFC1pt//33X3Tr1g3Xr1/HvHnzMHXqVOzatQu9evVCZmam1O7777/H//73P7Rv3x4LFixASEgIoqOj0alTJxw4cEDrnBEREdKHVU2bNn0mr5mISs/M1B0gIiqtvn37on379tLzmTNn4uDBg3jxxRfx0ksv4fLly7CysgIAmJmZwczs6f5Xl56eDmtrayiVyqd6neKYm5ub9PpU8aSlpcHGxsbU3SiVitTnkJAQbNy4Uev/hiFDhqBFixb47LPP8MMPP0jb582bh7S0NERGRqJOnToAgI4dO6JXr14ICwvDhAkTAACvv/46QkNDUa1aNenYMWPGoGnTpggNDYWfn5+0/aWXXkJSUhJsbW3x1VdfMbNOVE4xQ0ZElcILL7yAjz76CLdv39b6I0ffHLL9+/eja9eucHBwQLVq1dC4cWO8//77AHLmPXXo0AEAMHr0aGl4ZFhYGICceWLNmzdHZGQkunXrBmtra+nYgnPINFQqFd5//324urrCxsYGL730Eu7cuaPVpl69ehg1apTOsfnPWVzf9M0hS0tLw7vvvgsPDw9YWFigcePG+OqrryCE0Gonk8kQHByM7du3o3nz5rCwsECzZs2wZ88e/Te8gH///RcDBw6EjY0NnJ2dMWXKFGRkZOhte/LkSfTp0wf29vawtrZG9+7dcfz4ca02jx49wuTJk1GvXj1YWFjA2dkZvXr1wl9//VVsXw4fPoz27dvD0tISXl5e+Pbbb/W+D9asWYMXXngBzs7OsLCwgLe3N1asWKFzvnr16uHFF1/Evn370Lp1a1haWsLb2xu//PKLVrusrCzMmTMHDRs2hKWlJWrUqIGuXbti//79xfb54sWLeOGFF2BlZYXatWvjk08+gVqt1tv2999/x3PPPQcbGxvY2toiICAAFy9eLPYamiG/R44cwVtvvQVnZ2fUrl0bQOHzD/XdN817ZcOGDWjcuDEsLS3Rrl07HD16VOu4adOmAQA8PT2l9+qtW7dw69YtrfdtwXOHhobqXP/SpUsYNmwYqlevjq5du0r7f/jhB7Rr1w5WVlZwdHTE0KFDdf5tFea///7DmDFj4OLiIr3fV69erdVGMw9y8+bN+PTTT1G7dm1YWlqiZ8+euH79erHX6Ny5s84HNQ0bNkSzZs1w+fJlre0///wzXnzxRSkYAwA/Pz80atQImzdvlra1a9dOKxgDgBo1auC5557TOaejoyNsbW2L7ScRmRYzZERUaYwYMQLvv/8+9u3bh/Hjx+ttc/HiRbz44oto2bIl5s6dCwsLC1y/fl0KCJo2bYq5c+di1qxZmDBhAp577jkAOX9Yady/fx99+/bF0KFD8cYbb8DFxaXIfn366aeQyWSYMWMGEhISsHjxYvj5+SEqKkrK5JVESfqWnxACL730Eg4dOoSxY8eidevW2Lt3L6ZNm4b//vsPixYt0mp/7Ngx/PLLL3jrrbdga2uLr7/+GoMGDUJMTAxq1KhRaL8eP36Mnj17IiYmBm+//Tbc3d2xfv16HDx4UKftwYMH0bdvX7Rr1w6zZ8+GXC6XAqM//vgDHTt2BABMnDgRW7duRXBwMLy9vXH//n0cO3YMly9fRtu2bQvty9mzZ9GnTx+4ublhzpw5UKlUmDt3LmrWrKnTdsWKFWjWrBleeuklmJmZ4bfffsNbb70FtVqNoKAgrbbXrl3DkCFDMHHiRAQGBmLNmjV47bXXsGfPHvTq1QtATvAwf/58jBs3Dh07dkRKSgrOnDmDv/76S2qjT1xcHJ5//nlkZ2fjvffeg42NDb777ju9743169cjMDAQ/v7++Pzzz5Geno4VK1aga9euOHv2bImKurz11luoWbMmZs2ahbS0tGLb63PkyBFs2rQJb7/9NiwsLLB8+XL06dMHp06dQvPmzfHKK6/g6tWr+PHHH7Fo0SI4OTkBAGrWrKkzJ6okXnvtNTRs2BDz5s2TPkz49NNP8dFHH2Hw4MEYN24cEhMT8c0336Bbt244e/YsHBwcCj1ffHw8OnXqJAWXNWvWxO+//46xY8ciJSUFkydP1mr/2WefQS6XY+rUqUhOTsYXX3yB4cOH4+TJk6V+LUIIxMfHo1mzZtK2//77DwkJCVqZf42OHTti9+7dxZ43Li5Ous9EVMEIIqIKYs2aNQKAOH36dKFt7O3tRZs2baTns2fPFvn/q1u0aJEAIBITEws9x+nTpwUAsWbNGp193bt3FwDEypUr9e7r3r279PzQoUMCgKhVq5ZISUmRtm/evFkAEEuWLJG21a1bVwQGBhZ7zqL6FhgYKOrWrSs93759uwAgPvnkE612r776qpDJZOL69evSNgBCqVRqbfv7778FAPHNN9/oXCu/xYsXCwBi8+bN0ra0tDTRoEEDAUAcOnRICCGEWq0WDRs2FP7+/kKtVktt09PThaenp+jVq5e0zd7eXgQFBRV5XX369+8vrK2txX///Sdtu3btmjAzMxMFf+Wlp6frHO/v7y/q16+vta1u3boCgPj555+lbcnJycLNzU3rvdaqVSsREBBQ6j5PnjxZABAnT56UtiUkJAh7e3sBQNy8eVMIIcSjR4+Eg4ODGD9+vNbxcXFxwt7eXmd7QZp/P127dhXZ2dla+wq+dzQK/vsRIue9AkCcOXNG2nb79m1haWkpXn75ZWnbl19+qdV/jZs3bxb6HgYgZs+erXP9119/XavdrVu3hEKhEJ9++qnW9vPnzwszMzOd7QWNHTtWuLm5iXv37mltHzp0qLC3t5feG5p/w02bNhUZGRlSuyVLlggA4vz580VeR5/169cLAGLVqlXSNs2/63Xr1um0nzZtmgAgnjx5Uug5jx49KmQymfjoo48KbVPYz4OITI9DFomoUqlWrVqR1RY1n5rv2LGj0CFhxbGwsMDo0aNL3H7kyJFaw4ZeffVVuLm5lehT77LYvXs3FAoF3n77ba3t7777LoQQ+P3337W2+/n5wcvLS3resmVL2NnZ4Z9//in2Om5ubnj11VelbdbW1tKcF42oqChcu3YNw4YNw/3793Hv3j3cu3cPaWlp6NmzJ44ePSr9TBwcHHDy5EncvXu3xK9XpVLhwIEDGDhwINzd3aXtDRo0QN++fXXa589AJScn4969e+jevTv++ecfJCcna7V1d3fHyy+/LD23s7P
2025-12-05 09:49:17 +01:00
"text/plain": [
"<Figure size 1000x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# --- 1. Filtres de base ---\n",
"merged[\"year\"] = merged[\"Centralisation Date\"].dt.year\n",
"\n",
"# Filtrer uniquement l'année 2021\n",
"ruptures_2021 = merged[(merged[\"year\"] == 2021) & (merged[\"rupture_flag\"] == True)].copy()\n",
"\n",
"print(\"Nombre total de ruptures en 2021 :\", len(ruptures_2021))\n",
"\n",
"# --- 2. Classification du type de gap ---\n",
"ruptures_2021[\"gap_type\"] = np.where(ruptures_2021[\"gap\"] > 0, \"positive\", \"negative\")\n",
"\n",
"# --- 3. Statistiques globales ---\n",
"gap_counts = ruptures_2021[\"gap_type\"].value_counts()\n",
"gap_percent = ruptures_2021[\"gap_type\"].value_counts(normalize=True) * 100\n",
"\n",
"print(\"\\n=== RUPTURES 2021 — POSITIVES vs NEGATIVES ===\")\n",
"print(gap_counts)\n",
"print(\"\\n(%)\")\n",
"print(gap_percent.map(lambda x: f\"{x:.2f}%\"))\n",
"\n",
"# --- 4. Intensité des écarts ---\n",
"intensity_stats = ruptures_2021.groupby(\"gap_type\")[\"gap\"].describe()\n",
"print(\"\\n=== STATISTIQUES DES GAPS ===\")\n",
"print(intensity_stats)\n",
"\n",
"# --- 5. Visualisation rapide ---\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(10,5))\n",
"sns.histplot(data=ruptures_2021, x=\"gap\", hue=\"gap_type\", bins=80, kde=True)\n",
2025-12-05 11:03:19 +01:00
"plt.xlim(-0.5e7, 0.5e7)\n",
2025-12-05 09:49:17 +01:00
"plt.title(\"Distribution des gaps de rupture en 2021\")\n",
"plt.xlabel(\"Gap (AUM_{t} Expected AUM_{t})\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
2025-12-05 11:03:19 +01:00
"execution_count": 36,
2025-12-05 09:49:17 +01:00
"id": "df9e0005-93f2-4885-baef-2e54921a42f4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"=== RUPTURE SUMMARY (in %) ===\n",
" total_obs rupture_count rupture_ratio\n",
"period \n",
"after_2021 935390 242411 25.92\n",
"before_2021 3414943 428083 12.54\n",
"during_2021 529964 71090 13.41\n",
"\n",
"=== GAP POSITIVE / NEGATIVE DISTRIBUTION (in %) ===\n",
"period period gap_type\n",
"after_2021 after_2021 negative 60.163524\n",
" positive 39.836476\n",
"before_2021 before_2021 negative 62.273905\n",
" positive 37.726095\n",
"during_2021 during_2021 negative 53.582782\n",
" positive 46.417218\n",
"dtype: float64\n"
]
}
],
"source": [
"# --- 1. ADD YEAR ---\n",
"merged[\"year\"] = merged[\"Centralisation Date\"].dt.year\n",
"\n",
"# --- 2. DEFINE PERIODS ---\n",
"conditions = [\n",
" merged[\"year\"] < 2021,\n",
" merged[\"year\"] == 2021,\n",
" merged[\"year\"] > 2021\n",
"]\n",
"\n",
"period_labels = [\"before_2021\", \"during_2021\", \"after_2021\"]\n",
"\n",
"merged[\"period\"] = np.select(\n",
" conditions,\n",
" period_labels,\n",
" default=\"unknown\"\n",
")\n",
"\n",
"# --- 3. CREATE GAP TYPE & FILTER ONLY RUPTURES ---\n",
"merged[\"gap_type\"] = np.where(\n",
" merged[\"gap\"] > 0, \"positive\",\n",
" np.where(merged[\"gap\"] < 0, \"negative\", \"zero\")\n",
")\n",
"\n",
"ruptures = merged[merged[\"rupture_flag\"] == True].copy()\n",
"\n",
"# --- 4. TOTAL OBS PER PERIOD ---\n",
"total_obs = merged.groupby(\"period\").size().rename(\"total_obs\")\n",
"\n",
"# --- 5. TOTAL RUPTURES PER PERIOD ---\n",
"rupture_counts = ruptures.groupby(\"period\").size().rename(\"rupture_count\")\n",
"\n",
"# --- 6. PROPORTION OF RUPTURES ---\n",
"rupture_ratio = (rupture_counts / total_obs).rename(\"rupture_ratio\")\n",
"\n",
"# --- 7. POSITIVE / NEGATIVE GAPS (% among ruptures) ---\n",
"gap_dist = (\n",
" ruptures.groupby([\"period\", \"gap_type\"])\n",
" .size()\n",
" .groupby(level=0)\n",
" .apply(lambda x: (x / x.sum()) * 100) # % par période\n",
")\n",
"\n",
"\n",
"# --- 8. MERGE AND DISPLAY ---\n",
"summary = pd.concat([total_obs, rupture_counts, rupture_ratio], axis=1)\n",
"summary[\"rupture_ratio\"] = (summary[\"rupture_ratio\"] * 100).round(2)\n",
"\n",
"print(\"\\n=== RUPTURE SUMMARY (in %) ===\")\n",
"print(summary)\n",
"\n",
"print(\"\\n=== GAP POSITIVE / NEGATIVE DISTRIBUTION (in %) ===\")\n",
"print(gap_dist)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}