Project_Carmignac/.ipynb_checkpoints/repair-checkpoint.ipynb

1671 lines
118 KiB
Plaintext
Raw Normal View History

2026-03-10 18:45:51 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"id": "29f7e620-7b04-45f6-ac87-f17505f140c3",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.graph_objects as go\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a48ad016-e4f2-40d9-a607-344a316f5f02",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_2803/242871890.py:1: DtypeWarning: Columns (1,2,3,4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" stocks = pd.read_csv(\"stocks.csv\")\n",
"/tmp/ipykernel_2803/242871890.py:2: DtypeWarning: Columns (1,2,3,4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" flows = pd.read_csv(\"flows.csv\")\n"
]
}
],
"source": [
"stocks = pd.read_csv(\"stocks.csv\")\n",
"flows = pd.read_csv(\"flows.csv\")\n",
"\n",
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3c6d9d05-b203-49ae-869f-7f85ead2c69e",
"metadata": {},
"outputs": [],
"source": [
"keys = [\n",
" \"Registrar Account - ID\",\n",
" \"Product - Isin\",\n",
" \"Centralisation Date\"\n",
"]\n",
"\n",
"stocks = stocks[keys + [\"Quantity - AUM\"]]\n",
"\n",
"flows = flows[keys + [\"Quantity - NetFlows\"]]\n",
"\n",
"flows = (\n",
" flows\n",
" .groupby(keys, as_index=False)\n",
" .sum()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d30c2235-281b-41a6-828b-abb6fcfc4183",
"metadata": {},
"outputs": [],
"source": [
"df = stocks.merge(flows, on=keys, how=\"left\")\n",
"\n",
"df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n",
"\n",
"df = df.sort_values(keys)\n",
"\n",
"\n",
"\n",
"# REBUILD ACCOUNTING IDENTITY\n",
"\n",
"\n",
"df[\"prev_aum\"] = df.groupby(\n",
" [\"Registrar Account - ID\",\"Product - Isin\"]\n",
")[\"Quantity - AUM\"].shift(1)\n",
"\n",
"df[\"prev_flow\"] = df.groupby(\n",
" [\"Registrar Account - ID\",\"Product - Isin\"]\n",
")[\"Quantity - NetFlows\"].shift(1).fillna(0)\n",
"\n",
"df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n",
"\n",
"\n",
"\n",
"# GAP ANALYSIS\n",
"\n",
"\n",
"df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n",
"df[\"gap_abs\"] = df[\"gap\"].abs()\n",
"\n",
"EPS = 10\n",
"\n",
"df[\"rupture_flag\"] = (\n",
" df[\"prev_aum\"].notna()\n",
" & (df[\"gap_abs\"] > EPS)\n",
")\n",
"\n",
"\n",
"\n",
"# PARAMETERS\n",
"\n",
"\n",
"GAP_TOL = 1e-6\n",
"REL_GAP_THR = 0.05\n",
"MIN_PERSISTENCE = 3\n",
"\n",
"\n",
"\n",
"# SORT DATA\n",
"\n",
"\n",
"df = df.sort_values(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")\n",
"\n",
"df[\"corrected_aum\"] = df[\"Quantity - AUM\"]\n",
"df[\"repair_flag\"] = False\n",
"\n",
"\n",
"# REBUILD EXPECTED AUM BEFORE REPAIR\n",
"\n",
"\n",
"df = df.sort_values(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")\n",
"\n",
"df[\"prev_aum\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - AUM\"]\n",
" .shift(1)\n",
")\n",
"\n",
"df[\"prev_flow\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - NetFlows\"]\n",
" .shift(1)\n",
" .fillna(0)\n",
")\n",
"\n",
"df[\"expected_stock\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n",
"\n",
"#delete negative AUM\n",
"df = df[df[\"Quantity - AUM\"] >= 0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "efd374d0-6393-45f2-926e-2c29249cd078",
"metadata": {},
"outputs": [],
"source": [
"def repair_group(g):\n",
"\n",
" g = g.copy()\n",
"\n",
" obs = g[\"Quantity - AUM\"].values\n",
" flows = g[\"Quantity - NetFlows\"].values\n",
"\n",
" corrected = obs.copy()\n",
"\n",
" \n",
" # Build expected AUM path\n",
" \n",
"\n",
" expected = np.empty_like(obs)\n",
" expected[0] = np.nan\n",
"\n",
" for t in range(1, len(obs)):\n",
" expected[t] = corrected[t-1] + flows[t-1]\n",
"\n",
" gap = obs - expected\n",
"\n",
" rel_gap = np.abs(gap) / np.maximum(np.abs(expected), 1.0)\n",
"\n",
" idx = None\n",
"\n",
" \n",
" # Detect persistent shift\n",
" \n",
"\n",
" for i in range(1, len(obs) - MIN_PERSISTENCE):\n",
"\n",
" if (\n",
" rel_gap[i] > REL_GAP_THR\n",
" and np.all(np.abs(gap[i:i+MIN_PERSISTENCE] - gap[i]) < GAP_TOL)\n",
" and np.all(np.abs(np.diff(flows[i:i+MIN_PERSISTENCE])) < GAP_TOL)\n",
" ):\n",
" idx = i\n",
" break\n",
"\n",
" if idx is None:\n",
" return g\n",
"\n",
" \n",
" # Compute shift\n",
" \n",
"\n",
" shift = gap[idx]\n",
"\n",
" candidate = obs[idx:] - shift\n",
"\n",
" \n",
" # SAFETY CHECKS\n",
" \n",
"\n",
" # 1. do not allow negative AUM\n",
" # refuse repair if it creates NEW negative AUM\n",
" if ((candidate < 0) & (obs[idx:] >= 0)).any():\n",
" return g\n",
"\n",
" # 2. avoid extreme corrections\n",
" if abs(shift) > 2 * np.nanmax(np.abs(obs)):\n",
" return g\n",
"\n",
" \n",
" # Apply correction\n",
" \n",
"\n",
" corrected[idx:] = candidate\n",
"\n",
" g.loc[g.index[idx]:, \"repair_flag\"] = True\n",
"\n",
" \n",
" # Rebuild expected path after repair\n",
" \n",
"\n",
" expected_corr = np.empty_like(obs)\n",
" expected_corr[0] = np.nan\n",
"\n",
" for t in range(1, len(obs)):\n",
" expected_corr[t] = corrected[t-1] + flows[t-1]\n",
"\n",
" g[\"corrected_aum\"] = corrected\n",
" g[\"expected_stock_corr\"] = expected_corr\n",
"\n",
" return g"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f94f07b4-e053-4828-bbb1-3697f9a11751",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_2803/2538339937.py:4: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
" .apply(repair_group)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Before repair After repair Repaired points\n",
"0 750995 7708 6386\n",
"\n",
"NUMBER OF MODIFIED OBSERVATIONS: 2407679\n",
"Share modified: 26.66 %\n",
"\n",
"TOTAL AUM\n",
"Raw total : 82015854859.97653\n",
"Repaired total : 82060603424.22249\n"
]
}
],
"source": [
"df = (\n",
" df\n",
" .groupby([\"Registrar Account - ID\", \"Product - Isin\"], group_keys=False)\n",
" .apply(repair_group)\n",
")\n",
"\n",
"# VALIDATION BEFORE / AFTER\n",
"\n",
"\n",
"df[\"gap_before\"] = df[\"Quantity - AUM\"] - df[\"expected_stock\"]\n",
"\n",
"df[\"gap_after\"] = df[\"corrected_aum\"] - df[\"expected_stock_corr\"]\n",
"\n",
"df[\"rupture_before\"] = df[\"gap_before\"].abs() > GAP_TOL\n",
"\n",
"df[\"rupture_after\"] = df[\"gap_after\"].abs() > GAP_TOL\n",
"\n",
"\n",
"\n",
"# SUMMARY\n",
"\n",
"\n",
"summary = pd.DataFrame({\n",
" \"Before repair\": [df[\"rupture_before\"].sum()],\n",
" \"After repair\": [df[\"rupture_after\"].sum()],\n",
" \"Repaired points\": [df[\"repair_flag\"].sum()]\n",
"})\n",
"\n",
"print(summary)\n",
"\n",
"\n",
"\n",
"# BUILD REPAIRED DATASET\n",
"\n",
"\n",
"stocks_repaired = stocks.copy()\n",
"\n",
"repair_map = df[[\n",
" \"Registrar Account - ID\",\n",
" \"Product - Isin\",\n",
" \"Centralisation Date\",\n",
" \"corrected_aum\",\n",
" \"repair_flag\"\n",
"]]\n",
"\n",
"stocks_repaired = stocks_repaired.merge(\n",
" repair_map,\n",
" on=keys,\n",
" how=\"left\"\n",
")\n",
"\n",
"stocks_repaired[\"Quantity - AUM\"] = np.where(\n",
" stocks_repaired[\"repair_flag\"] == True,\n",
" stocks_repaired[\"corrected_aum\"],\n",
" stocks_repaired[\"Quantity - AUM\"]\n",
")\n",
"\n",
"stocks_repaired.to_csv(\"AUM_repaired.csv\", index=False)\n",
"\n",
"\n",
"\n",
"# COMPARISON RAW VS REPAIRED\n",
"\n",
"\n",
"df_compare = stocks.merge(\n",
" stocks_repaired,\n",
" on=keys,\n",
" how=\"inner\",\n",
" suffixes=(\"_raw\",\"_repaired\")\n",
")\n",
"\n",
"df_compare[\"aum_diff\"] = (\n",
" df_compare[\"Quantity - AUM_repaired\"]\n",
" - df_compare[\"Quantity - AUM_raw\"]\n",
")\n",
"\n",
"print(\"\\nNUMBER OF MODIFIED OBSERVATIONS:\",\n",
" (df_compare[\"aum_diff\"] != 0).sum())\n",
"\n",
"print(\"Share modified:\",\n",
" round((df_compare[\"aum_diff\"] != 0).mean()*100,2), \"%\")\n",
"\n",
"print(\"\\nTOTAL AUM\")\n",
"\n",
"print(\"Raw total :\", df_compare[\"Quantity - AUM_raw\"].sum())\n",
"print(\"Repaired total :\", df_compare[\"Quantity - AUM_repaired\"].sum())\n",
"\n",
"\n",
"\n",
"# RUPTURE DISTRIBUTION BEFORE / AFTER\n",
"\n",
"\n",
"def rupture_distribution(df, flag):\n",
"\n",
" rupture_summary = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" .agg(\n",
" n_ruptures=(flag,\"sum\"),\n",
" total_obs=(flag,\"count\"),\n",
" rupture_ratio=(flag,\"mean\")\n",
" )\n",
" .reset_index()\n",
" )\n",
"\n",
" rs = rupture_summary.copy()\n",
"\n",
" bins = [0,0.01,0.10,0.30,1.01]\n",
"\n",
" labels = [\n",
" \"Clean / quasi-clean (≤1%)\",\n",
" \"Moderate (110%)\",\n",
" \"High (1030%)\",\n",
" \"Severe (>30%)\"\n",
" ]\n",
"\n",
" rs[\"rupture_class\"] = pd.cut(\n",
" rs[\"rupture_ratio\"],\n",
" bins=bins,\n",
" labels=labels,\n",
" include_lowest=True\n",
" )\n",
"\n",
" dist = (\n",
" rs[\"rupture_class\"]\n",
" .value_counts(normalize=True)\n",
" .sort_index()\n",
" * 100\n",
" ).round(1)\n",
"\n",
" return dist\n",
"\n",
"\n",
"dist_before = rupture_distribution(df,\"rupture_before\")\n",
"dist_after = rupture_distribution(df,\"rupture_after\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "54491736-58b3-4ef7-b6c4-5534ec796bce",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"domain": {
"x": [
0,
0.48
]
},
"hole": 0.45,
"labels": [
"Clean / quasi-clean (≤1%)",
"Moderate (110%)",
"High (1030%)",
"Severe (>30%)"
],
"name": "Before repair",
"textinfo": "percent",
"type": "pie",
"values": {
"bdata": "MzMzMzNzREAzMzMzM7M6QJqZmZmZGTBAZmZmZmZmMEA=",
"dtype": "f8"
}
},
{
"domain": {
"x": [
0.52,
1
]
},
"hole": 0.45,
"labels": [
"Clean / quasi-clean (≤1%)",
"Moderate (110%)",
"High (1030%)",
"Severe (>30%)"
],
"name": "After repair",
"textinfo": "percent",
"type": "pie",
"values": {
"bdata": "mpmZmZn5WEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"dtype": "f8"
}
}
],
"layout": {
"annotations": [
{
"showarrow": false,
"text": "Before repair",
"x": 0.22,
"y": 0.5
},
{
"showarrow": false,
"text": "After repair",
"x": 0.78,
"y": 0.5
}
],
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermap": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermap"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Rupture intensity distribution (Before vs After repair)"
}
}
},
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAzkAAAFoCAYAAAB0XzViAAAQAElEQVR4AeydB5wURfbH34SFTeQMShBBEVBBMWACA6JnOhWzZxbOrCgIomJC0VOMnDmcJ5yinqhnFvCvoKCCiiBJyZklboJJ//71WkPv7OTY3fPjQ213V3j16lvdM/X6VdU4A/xHAiRAAiRAAiRAAiRAAiRAAjYi4BT+IwESCEOAUSRAAiRAAiRAAiRAAlYlQCPHqj1HvUmABEggFwRYJwmQAAmQAAlYgACNHAt0ElUkARIgARIgARIwNwFqRwIkYC4CNHLM1R/UhgRIgARIgARIgARIgATsQiBn7aCRkzP0rJgESIAESIAESIAESIAESCATBGjkZIIqZaaPACWRAAmQAAmQAAmQAAmQQIIEaOQkCIzZSYAESMAMBKgDCZAACZAACZBAZAI0ciKzYQoJkAAJkAAJkIC1CFBbEiABEtAJ0MjRMfAPCZAACZAACZAACZAACdiVQP61i0ZO/vU5W0wCJEACJEACJEACJEACtiZAI8fW3Zu+xlESCZAACZAACZAACZAACViFAI0cq/QU9SQBEjAjAepEAiRAAiRAAiRgQgI0ckzYKVSJBEiABEiABKxNgNqTAAmQQG4J0MjJLX/WTgIkQAIkQAIkQAIkkC8E2M6sEaCRkzXUrIgESIAESIAESIAESIAESCAbBGjkZINy+uqgJBIgARIgARIgARIgARIggRgEaOTEAMRkEiABKxCgjiRAAiRAAiRAAiSwmwCNnN0seEYCJEACJEAC9iLA1pAACZBAnhKgkZOnHc9mkwAJkAAJkAAJkEC+EmC77U+ARo79+5gtJAESIAESIAESIAESIIG8IkAjJ6nuZiESIAESIAESIAESIAESIAGzEqCRY9aeoV4kYEUC1JkESIAESIAESIAETECARo4JOoEqkAAJkAAJ2JsAW0cCJEACJJBdAjRyssubtZEACZAACZAACZAACdQQ4F8SyBgBGjkZQ0vBJEACJEACJEACJEACJEACuSBgbSMnF8RYJwmQAAmQAAmQAAmQAAmQgKkJ0MgxdfdQORJIjgBLkQAJkAAJkAAJkEA+E6CRk8+9z7aTAAmQQH4RYGtJgARIgATyhACNnDzpaDaTBEiABEiABEiABMITYCwJ2I8AjRz79SlbRAIkQAIkQAIkQAIkQAJ5TSAtRk5eE2TjSYAESIAESIAESIAESIAETEWARo6puoPK2IwAm0MCJEACJEACJEACJJADAjRycgCdVZIACZBAfhNg60mABEiABEggswRo5GSWL6WTAAmQAAmQAAmQQHwEmIsESCBtBGjkpA0lBZEACZAACZAACZAACZAACaSbQDLyaOQkQ41lSIAESIAESIAESIAESIAETEuARo5pu4aKpY9AapLmLVwmh59yjUydMSc1QWks/dLEj3SdoFsaxZpe1MayrTLwgmFZbfvIB1+Q7v0u1cPlN4+Vyqpq03PKpYK4NxUv9BX6LJf6WL1uPOP4/CFLq/ck9ScBEsg2AcsYOfiixIe8+vI0HvGlmg1w6ssmW/Ul0yboBjbJDMgVYwzqkqk7E2VC22MWHcEIAw/cE/G2G3lRBm2KtwzyqXKoE9eZCqqecPohLtn7Kp36jnt+ki7u/dfGSPd9OurnMDpgfEC/0ADeaJeeMYk/YD577mKZ9s7jMm/aq/LyuOFSXFSYhCSTFklALbAAX3wO4zkMVxT3yYtvfChvPTda5/XJhIf1bCiD8voF/whYxHtv4j7H/Q5sw+9/jkY2QDCQAAmQQBwELGPkqLacfuIR+pcnBhwIT4+5UR577i39S0Pl4ZEESMB+BGC4T/50ugy/7nxp0axxnQYe2qubfP/xs7U+H/of0UuuHPqwJGPoYCAPA2fQqf3C1ldHARtHKBbdunSQlWs2yK8Ll9ZpLYzN6bPmCphjYF4nAyOSJoD7Hff9zDm/CULSglgwbgLMSAIkYH0CljNyQpH379tLYPhMnT4nqYFMqDyrX19x/sn6IK+/xsXqbYH+Zm3PmBFXybcfjg96E6BrpgIGjKgLdWaqjlhyc90PGEC/PukzgSGDEEtflX7CMQfL9vJK+W72fBXFYxIEYNRs214u1152huzZtqV8/tUPSUhhEUUAzzKeaTzbKi7WEfc9Ap4DPA+x8jOdBEiABDJAwFIiLW/kRKKNaRMDLxgmeANpzIO3waHTBDB1AHnnL1qmz/fHlAwETIFRXyZ4E4w3whgwwXOEdASUhfxk6lu2cp2gDsgx6oQ6VTzSECAf9cQKyGeUhfzQMVb7wOni68fob2nxthx1IkAP6AM5CJCPeBVC08EJ9SMfgsqHI9hDhjFAN6SpAD2hi8oDGZAHuYiPpOOKVev0voM8VVYdUQ5yw6WpPMYj8il9cLxl9DPGZP0ceSATsvUI7Q90hK4oowL015J0AzzW/YOykAHZqjyYIQ5pShbkqVBRWR28h1AmVCfoh7jQsuhT9B3qgizUEa9+yK8CykE31I2Ac8SpdBxRN+JnzVkQVVfkjRSWrlgnvy1eLkcc0jOp6WJ7dWhTS7RqP3RWAXqqTGjDaZeM1J8H4/OO/lB5wE6VxRHXKg1HxR7xkAcGyAfuqN+YB/EqGOtAnnABeZAfx9B0xIWmQQfEqYB7AvqFlg13DV0xsIYXB4Ps3j27CDxcxvI4P/OKu3Qvg/Hz44kX35FIzyzkoj4cwUTphqOxL5BH8UO8ah/yoV1IDxegE9qJPKo8yqAu1IkyKg/iVYB8pKmA8pAT7fsBeSETspUcdUQc0pBHBSUT9SNO6RetfZgmefGgAfpzgOcB5RhIgARIgAQiE7C8kYMvj3UbNgu+gDu1bx25pTFSMAXjltHj5fWnRuqeEEx7QZHrRj6hz4HGG7cXHx0mDUuL5ZbB5+h5MF0Ob+SQL9GA+s7/+70ydMi5uiz1Vg9fdscNukVat2yqx6MOrAeY9MG0lKbkoT5j+yBzzfoyuf/x13XVMR0CbcdbWnjGUC+CWoMAzviyhh4oizQE6InBjfqy1oVpfzAw1A7BNoDZyDEv6IN9xCPgix6DJSUPzNu2aiaR5p1H07H9Hq0F04rCefTUW+gLzzwB1UYMqo1GndDG26+/QPcGRCyoJaDfYCRceeEpwTarqZQYNMVz/8CAPmfwaIH3AfUiRPPIIf+Ndz4ZvIeQHwNQDCpD+0NTMer/ePQLFYB2Qd8xI68KthntRxzSjPlDdY3V18ayOFeemMN674fLuAL6E4NzDMwRVCH01XExnjHwwDoIPA+4d8EWAf0BuXgW8LmDdiAe9zDuG8QjXdWFIwb9jz77pnw56TGdk3qmwKjfWTfp9y1kIGAty0jtOcFgF2UjBbQHAe0LrQ9eFujdY59OevFEnzO9kOEPBtRGAxP3Jz5P8FypbHg2333pXt3TZvz8uPHKs/TPVOhjjFcM4ukLVQeO+FxB+8AKIZ7P33TwR3uNn5/od+ijvh9wjlBaWhRcjwT9IuVD3nAhVvtaNmuiF1PPg37BPyRAAiRAAmEJOMPGWihy4ntT9LeHeMOFN13Jqo4vYQzy8WUNGZAFmZj/jIC4dAbUh0EUBlNGuW+8+7lusI266eJgNHSKNIAPZopxgvqM7VMyMTCLZ0AMBhjoPHr3NbXWJ8BwwDSW9z+bUUsDDAwxxUlFnjagrzRqWBqcNoQ6UTcG5dAF+cAcg58T+/XBZcIhtA4IwAAQA8FuXTpILCM4UhsxsIBxC3mRghp0GAfhGBBj0FpSVBSpWK141IH8KFcrIcIF8sPwNt5Dkfojgoiko9F/Y5+eqE8VNeqLPsdgFmnIoyoI1RV9jecL9xQG0SpfpOPvy1br90/L5o0jZdE/B/qcNETfBQ1v0XGOPoUhjvpUwVSfMXzmQG+8oFBycQ/j2UA80lVdOIIH7muVF3HqvkQamCEOAX2JNS14mWDkhzRjgCx4tVCfkR/K4LnC5wV0UtepPGeh9zaMKwQ8V2iHUa9EzxPtC3yuxGPYGPUA41T5h35+gj/uX9xfCKgPcU/ed0OtKayIQ77QfkL+cCFW+/AZhs8yPA/hyjOOBEiABEhgNwH
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# DONUT CHART BEFORE / AFTER\n",
"\n",
"fig = go.Figure()\n",
"\n",
"fig.add_trace(go.Pie(\n",
" labels=dist_before.index,\n",
" values=dist_before.values,\n",
" hole=0.45,\n",
" name=\"Before repair\",\n",
" domain=dict(x=[0,0.48]),\n",
" textinfo=\"percent\"\n",
"))\n",
"\n",
"fig.add_trace(go.Pie(\n",
" labels=dist_after.index,\n",
" values=dist_after.values,\n",
" hole=0.45,\n",
" name=\"After repair\",\n",
" domain=dict(x=[0.52,1]),\n",
" textinfo=\"percent\"\n",
"))\n",
"\n",
"fig.update_layout(\n",
" title=\"Rupture intensity distribution (Before vs After repair)\",\n",
" annotations=[\n",
" dict(text=\"Before repair\", x=0.22, y=0.5, showarrow=False),\n",
" dict(text=\"After repair\", x=0.78, y=0.5, showarrow=False)\n",
" ]\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d844f6f0-c0f4-4f71-8280-1fd39ced83b7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_2803/1326609018.py:3: DtypeWarning:\n",
"\n",
"Columns (0,5) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset size: (5510007, 6)\n",
"\n",
"ACCOUNTING GAP DISTRIBUTION\n",
"count 5.430291e+06\n",
"mean 1.194750e+03\n",
"std 4.328850e+04\n",
"min 0.000000e+00\n",
"25% 0.000000e+00\n",
"50% 0.000000e+00\n",
"75% 0.000000e+00\n",
"max 4.254300e+07\n",
"Name: gap_abs, dtype: float64\n",
"\n",
"Relative gap quantiles\n",
"0.90 0.022107\n",
"0.95 0.523385\n",
"0.99 33.742925\n",
"Name: gap_rel, dtype: float64\n",
"\n",
"NEGATIVE AUM: 15989\n",
"\n",
"REPAIR RATE\n",
"0.006871198899152992\n",
"\n",
"AUM JUMP QUANTILES\n",
"0.90 1.005955\n",
"0.95 1.125092\n",
"0.99 11.418952\n",
"Name: aum_jump, dtype: float64\n"
]
}
],
"source": [
"# LOAD DATA\n",
"\n",
"aum = pd.read_csv(\"AUM_repaired.csv\")\n",
"\n",
"aum[\"Centralisation Date\"] = pd.to_datetime(aum[\"Centralisation Date\"])\n",
"\n",
"\n",
"# KEEP USEFUL COLUMNS\n",
"\n",
"aum = aum[[\n",
" \"Registrar Account - ID\",\n",
" \"Product - Isin\",\n",
" \"Centralisation Date\",\n",
" \"Quantity - AUM\",\n",
" \"repair_flag\"\n",
"]]\n",
"\n",
"flows = flows[[\n",
" \"Registrar Account - ID\",\n",
" \"Product - Isin\",\n",
" \"Centralisation Date\",\n",
" \"Quantity - NetFlows\"\n",
"]]\n",
"\n",
"\n",
"\n",
"# AGGREGATE FLOWS\n",
"\n",
"flows = (\n",
" flows\n",
" .groupby(\n",
" [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n",
" as_index=False\n",
" )[\"Quantity - NetFlows\"]\n",
" .sum()\n",
")\n",
"\n",
"\n",
"\n",
"# MERGE DATASETS\n",
"\n",
"df = aum.merge(\n",
" flows,\n",
" on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n",
"\n",
"print(\"Dataset size:\", df.shape)\n",
"\n",
"\n",
"\n",
"# SORT DATA\n",
"\n",
"df = df.sort_values(\n",
" [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n",
")\n",
"\n",
"\n",
"\n",
"# REBUILD ACCOUNTING IDENTITY\n",
"\n",
"df[\"prev_aum\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - AUM\"]\n",
" .shift(1)\n",
")\n",
"\n",
"df[\"prev_flow\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - NetFlows\"]\n",
" .shift(1)\n",
" .fillna(0)\n",
")\n",
"\n",
"df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n",
"\n",
"\n",
"\n",
"# GAPS\n",
"\n",
"df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n",
"\n",
"df[\"gap_abs\"] = df[\"gap\"].abs()\n",
"\n",
"df[\"gap_rel\"] = (\n",
" df[\"gap_abs\"] /\n",
" df[\"expected_aum\"].abs().clip(lower=1)\n",
")\n",
"\n",
"\n",
"\n",
"# ACCOUNTING CONSISTENCY\n",
"\n",
"print(\"\\nACCOUNTING GAP DISTRIBUTION\")\n",
"\n",
"print(df[\"gap_abs\"].describe())\n",
"\n",
"print(\"\\nRelative gap quantiles\")\n",
"\n",
"print(df[\"gap_rel\"].quantile([0.90,0.95,0.99]))\n",
"\n",
"\n",
"\n",
"# NEGATIVE AUM\n",
"\n",
"neg = (df[\"Quantity - AUM\"] < 0).sum()\n",
"\n",
"print(\"\\nNEGATIVE AUM:\", neg)\n",
"\n",
"\n",
"\n",
"# REPAIR RATE\n",
"\n",
"print(\"\\nREPAIR RATE\")\n",
"\n",
"print(df[\"repair_flag\"].mean())\n",
"\n",
"\n",
"\n",
"# AUM JUMPS\n",
"\n",
"\n",
"df[\"prev_obs\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - AUM\"]\n",
" .shift(1)\n",
")\n",
"\n",
"df[\"aum_jump\"] = (\n",
" df[\"Quantity - AUM\"] /\n",
" df[\"prev_obs\"].replace(0,np.nan)\n",
")\n",
"\n",
"print(\"\\nAUM JUMP QUANTILES\")\n",
"\n",
"print(df[\"aum_jump\"].quantile([0.90,0.95,0.99]))\n",
"\n",
"\n",
"\n",
"# VISUAL CHECK\n",
"\n",
"\n",
"def plot_series(account, isin):\n",
"\n",
" sub = df[\n",
" (df[\"Registrar Account - ID\"] == account) &\n",
" (df[\"Product - Isin\"] == isin)\n",
" ]\n",
"\n",
" plt.figure(figsize=(8,3))\n",
"\n",
" plt.plot(\n",
" sub[\"Centralisation Date\"],\n",
" sub[\"Quantity - AUM\"],\n",
" label=\"AUM\"\n",
" )\n",
"\n",
" plt.plot(\n",
" sub[\"Centralisation Date\"],\n",
" sub[\"expected_aum\"],\n",
" linestyle=\"--\",\n",
" label=\"Expected AUM\"\n",
" )\n",
"\n",
" plt.legend()\n",
" plt.title(f\"Account {account} — ISIN {isin}\")\n",
"\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f7d759f7-64be-4d82-a79c-98cda407cfec",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmQAAAJwCAYAAAAnVn0xAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAU95JREFUeJzt3Xd8VGW+x/HvpMykkUYSkkCA0EFapKkrTZEii2JXUIGr7OoFXVbdq1z3iririG2xoa66YMEuKHZZNYCIBVZEiggYQJQSIKSTNs/9g80sQxJImeSZJJ/36zUvM2eec+Z3nsyYL+c85zwOY4wRAAAArAmwXQAAAEBzRyADAACwjEAGAABgGYEMAADAMgIZAACAZQQyAAAAywhkAAAAlhHIAAAALCOQAQAAWEYgA5qpYcOGadiwYbbLwL/t2LFDDodDCxcutF1KtSxcuFAOh0M7duywXUqDSU9Pl8PhUHp6uu1S0AQRyABJ8+fPl8Ph0KBBgyp9vfyP5QMPPFDp6w888ECFP07Dhg2Tw+FQ586dK11n2bJlcjgccjgceuONN6pd60UXXaRzzz23ytfL/2hU9rj88sur/T4NbfPmzXI4HAoJCdHhw4dtl1NvXnrpJc2bN892GV6O/8y4XC61atVKw4YN0z333KPMzEzbJTao+fPnN5pgjKYjyHYBgD9YtGiR2rdvr6+//lrbtm1Tp06dfLLdkJAQbdu2TV9//bUGDhxY4T1DQkJ05MiRam+vpKREy5Yt05w5c07a9sYbb9SAAQO8lrVv377a79XQXnzxRSUmJiorK0tvvPGGrr32Wtsl1YuXXnpJGzZs0IwZM7yWt2vXToWFhQoODrZTmP7zmSkrK1NmZqa++OILzZo1Sw899JBee+01nXXWWZ62V111lS6//HK5XC5r9daX+fPnKy4uTpMnT/ZaPmTIEBUWFsrpdNopDE0agQzNXkZGhr744gstXrxYv//977Vo0SLNmjXLJ9vu2LGjSktL9fLLL3sFsiNHjmjJkiUaO3as3nzzzWpvb+XKlcrNzdXYsWNP2nbw4MG6+OKLa1V3QzPG6KWXXtKECROUkZGhRYsWNdlAVpXyo4M2VfaZ+e677zRy5EhddNFF2rRpk5KSkiRJgYGBCgwMtFGmNQEBAdZ/R2i6OGWJZm/RokWKiYnR2LFjdfHFF2vRokU+3f4VV1yhV199VW6327PsnXfeUUFBgS699NIabeu9995Tjx496u1I1/79+3XNNdeoVatWCgkJUZ8+ffTcc895tTn11FN14YUXei3r1auXHA6H1q9f71n26quvyuFwaPPmzSd931WrVmnHjh26/PLLdfnll2vFihXavXt3hXZut1sPP/ywevXqpZCQEMXHx2v06NFas2aNV7sXX3xRAwcOVFhYmGJiYjRkyBB9/PHHXm3mz5+vU045RS6XS8nJyZo2bVqFU6Xt27evcJREqjj+rvyU32uvvaa7775bbdq0UUhIiM4++2xt27bNa7333ntPO3fu9JweLP9dVjaGbPLkyYqIiNAvv/yi8ePHKyIiQvHx8brllltUVlbmVdPBgwd11VVXKTIyUtHR0Zo0aZK+++67Oo9L69Onj+bNm6fDhw/rscce8yyvbAzZ22+/rbFjxyo5OVkul0sdO3bUX/7ylwq1StLjjz+uDh06KDQ0VAMHDtTKlStr3a/lXn/9dfXr10+hoaGKi4vTlVdeqV9++cWrzd69ezVlyhS1adNGLpdLSUlJOv/88z370b59e23cuFHLly/3/I7Ka6pqDNlXX32lc889VzExMQoPD1fv3r318MMP16yj0ewRyNDsLVq0SBdeeKGcTqeuuOIKbd26Vd98843Ptj9hwgTt2bPH63/iL730ks4++2wlJCTUaFvvv//+CcePHSs3N1cHDhzwehwbCo9XWFioYcOG6YUXXtDEiRN1//33KyoqSpMnT/b64zJ48GB9/vnnnueHDh3Sxo0bFRAQoJUrV3qWr1y5UvHx8erevftJa120aJE6duyoAQMGaNy4cQoLC9PLL79cod0111yjGTNmKCUlRXPnztVtt92mkJAQffnll542s2fP1lVXXaXg4GDdddddmj17tlJSUvTpp5962tx5552aNm2akpOT9eCDD+qiiy7SU089pZEjR6qkpOSk9Vbl3nvv1ZIlS3TLLbdo5syZ+vLLLzVx4kTP67fffrv69u2ruLg4vfDCC3rhhRdOOp6srKxMo0aNUsuWLfXAAw9o6NChevDBB/X3v//d08btdmvcuHF6+eWXNWnSJN19993as2ePJk2aVOt9OdbFF1+s0NDQCqH2eAsXLlRERIRuuukmPfzww+rXr5/uuOMO3XbbbV7tnnjiCU2fPl1t2rTRfffdp8GDB2v8+PGVhnDp5P1a/t6XXnqpAgMDNWfOHE2dOlWLFy/WmWee6RW0L7roIi1ZskRTpkzR/PnzdeONNyo3N1e7du2SJM2bN09t2rRRt27dPL+j22+/vcp9XrZsmYYMGaJNmzbpD3/4gx588EENHz5c77777gn7CqjAAM3YmjVrjCSzbNkyY4wxbrfbtGnTxvzhD3/wapeRkWEkmfvvv7/S7dx///1GksnIyPAsGzp0qDnllFOMMcb079/fXHPNNcYYY7KysozT6TTPPfec+eyzz4wk8/rrr5+01p9++slIMp999tkJ25Vvs7LH8fUNHTrU83zevHlGknnxxRc9y4qLi83pp59uIiIiTE5OjjHGmNdff91IMps2bTLGGLN06VLjcrnMeeedZy677DLPur179zYXXHDBSferuLjYtGzZ0tx+++2eZRMmTDB9+vTxavfpp58aSebGG2+ssA23222MMWbr1q0mICDAXHDBBaasrKzSNvv37zdOp9OMHDnSq81jjz1mJJl//OMfnmXt2rUzkyZNqvB+x/ddeZ93797dFBUVeZY//PDDRpL5/vvvPcvGjh1r2rVrV2Gb5Z+xBQsWeJZNmjTJSDJ33XWXV9u0tDTTr18/z/M333zTSDLz5s3zLCsrKzNnnXVWhW1Wpjqfwz59+piYmBjP8wULFlT4TBUUFFRY7/e//70JCwszR44cMcYYU1RUZFq2bGkGDBhgSkpKPO0WLlxoJNWqX4uLi01CQoLp2bOnKSws9LR79913jSRzxx13GGOOfvdO9D0ud8opp3jVcXw95d/B0tJSk5qaatq1a2eysrK82pZ/3oDq4ggZmrVFixapVatWGj58uKSj43guu+wyvfLKK5WeZqmtCRMmaPHixSouLtYbb7yhwMBAXXDBBTXaxnvvvaeoqCideeaZ1Wp/xx13aNmyZV6PxMTEKtu///77SkxM1BVXXOFZFhwcrBtvvFF5eXlavny5pKNHyCRpxYoVko4eCRswYIDOOecczxGyw4cPa8OGDZ62J/LBBx/o4MGDXu97xRVX6LvvvtPGjRs9y9588005HI5Kx/c5HA5J0ltvvSW326077rhDAQEBlbb55z//qeLiYs2YMcOrzdSpUxUZGan33nvvpDVXZcqUKV4Dvsv3/6effqr1NiXpuuuu83o+ePBgr21++OGHCg4O1tSpUz3LAgICNG3atDq977EiIiKUm5t7wjahoaGen8uP0A4ePFgFBQX64YcfJElr1qzRwYMHNXXqVAUF/WcY88SJExUTE1Ppdk/Wr2vWrNH+/fv13//9315jvMaOHatu3bp5fqehoaFyOp1KT09XVlZWTXa/Ut9++60yMjI0Y8YMRUdHe71W/nkDqqtZBrIVK1Zo3LhxSk5OlsPh0FtvvVXjbRhj9MADD6hLly5yuVxq3bq17r77bt8Xi3pTVlamV155RcOHD1dGRoa2bdumbdu2adCgQdq3b58++eSTGm+zqv8JX3755crOztYHH3ygRYsW6be//a1atGhRo22/9957GjlypNcfsRPp1auXRowY4fU40YDknTt3qnPnzhWCTPkpx507d0qSWrVqpc6dO3vC18qVKzV48GANGTJEv/76q3766SetWrVKbre7WoHsxRdfVGpqqlwul+d30LFjR4WFhXmN59u+fbuSk5MVGxtb5ba2b9+
"text/plain": [
"<Figure size 700x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# COMPUTE AUM CHANGE\n",
"\n",
"df[\"prev_aum\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - AUM\"]\n",
" .shift(1)\n",
")\n",
"\n",
"df[\"delta_aum\"] = df[\"Quantity - AUM\"] - df[\"prev_aum\"]\n",
"\n",
"df[\"flow_lag\"] = (\n",
" df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n",
" [\"Quantity - NetFlows\"]\n",
" .shift(1)\n",
")\n",
"\n",
"\n",
"\n",
"# FILTER VALID OBSERVATIONS\n",
"\n",
"\n",
"diag = df[\n",
" df[\"prev_aum\"].notna() &\n",
" df[\"flow_lag\"].notna()\n",
"]\n",
"\n",
"\n",
"\n",
"# SAMPLE FOR PLOTTING (dataset is large)\n",
"\n",
"\n",
"sample = diag.sample(20000, random_state=1)\n",
"\n",
"\n",
"\n",
"# SCATTER PLOT\n",
"\n",
"\n",
"plt.figure(figsize=(7,7))\n",
"\n",
"plt.scatter(\n",
" sample[\"flow_lag\"],\n",
" sample[\"delta_aum\"],\n",
" alpha=0.3,\n",
" s=5\n",
")\n",
"\n",
"# perfect accounting identity\n",
"x = np.linspace(\n",
" sample[\"flow_lag\"].min(),\n",
" sample[\"flow_lag\"].max(),\n",
" 100\n",
")\n",
"\n",
"plt.plot(x, x, color=\"red\", label=\"Perfect identity\")\n",
"\n",
"plt.xlabel(\"Flow (t-1)\")\n",
"plt.ylabel(\"Δ AUM\")\n",
"\n",
"plt.title(\"AUM / Flow Accounting Diagnostic\")\n",
"\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d0a959c9-cfff-44cb-a1df-6c7275ec5b43",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.50 0.000000\n",
"0.90 0.010919\n",
"0.95 0.162503\n",
"0.99 11.550941\n",
"Name: implied_return, dtype: float64\n"
]
}
],
"source": [
"df[\"implied_return\"] = (\n",
" df[\"Quantity - AUM\"] - df[\"prev_aum\"] - df[\"flow_lag\"]\n",
") / df[\"prev_aum\"].replace(0, np.nan)\n",
"\n",
"print(df[\"implied_return\"].quantile([0.5,0.9,0.95,0.99]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}