{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "29f7e620-7b04-45f6-ac87-f17505f140c3", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import plotly.graph_objects as go\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 6, "id": "a48ad016-e4f2-40d9-a607-344a316f5f02", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "KeyboardInterrupt\n", "\n" ] } ], "source": [ "stocks = pd.read_csv(\"stocks.csv\")\n", "flows = pd.read_csv(\"flows.csv\")\n", "\n", "stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", "flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])" ] }, { "cell_type": "code", "execution_count": 11, "id": "221a4c7b-0f50-431a-875b-ad40bed7f0ac", "metadata": {}, "outputs": [], "source": [ "import os\n", "import s3fs\n", "os.environ[\"AWS_ACCESS_KEY_ID\"] = 'N0C5PK75FDX2WXI8OVP1'\n", "os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'nZvC2urUkG7EvhDsFDyaOslqr160aoWMs+5MP3Ft'\n", "os.environ[\"AWS_SESSION_TOKEN\"] = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJOMEM1UEs3NUZEWDJXWEk4T1ZQMSIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiKiJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTc3MzIyNzI3OCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJzYXJhaC50aG91bXlyZUBlbnNhZS5mciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJleHAiOjE3NzQ0MzY4OTksImZhbWlseV9uYW1lIjoiVEhPVU1ZUkUiLCJnaXZlbl9uYW1lIjoiU2FyYWgiLCJncm91cHMiOlsiYmRjLWRhdGEiLCJiZGMtY2FybWlnbmFjLWczIl0sImlhdCI6MTc3MzIyNzI5OSwiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI5Mjc0ODgyMy04OTgzLTQzYjktYTZhNy0xYjhlNDdiOTRjNTUiLCJuYW1lIjoiU2FyYWggVEhPVU1ZUkUiLCJwb2xpY3kiOiJzdHNvbmx5IiwicHJlZmVycmVkX3VzZXJuYW1lIjoic3Rob3VteXJlLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjRkODM3NWVmLTQwY2QtNDYyMi05NzIyLTI4YjhjZTQ2MWQ5YyIsInN1YiI6ImVhYWVkN2QyLWM4MjYtNGIxNC05MzczLTYwYjNhODhlMWFiNiIsInR5cCI6IkJlYXJlciJ9.hl_SekvaH9A22PMb3W0VQBSNO67LnaneIuLC-X5XBnzOO5GLV61aocDRfYC6hvVVhdzyewSTtD2kvdyJdeu6qA'\n", "os.environ[\"AWS_DEFAULT_REGION\"] = 'us-east-1'\n", "fs = s3fs.S3FileSystem(\n", " client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n", " key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n", " secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n", " token = os.environ[\"AWS_SESSION_TOKEN\"])\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "87505949-ecd8-4fad-a19b-d29130be587e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['Registrar Account - ID', 'Product - Isin', 'Centralisation Date',\n", " 'Quantity - AUM', 'corrected_aum', 'repair_flag'],\n", " dtype='object')\n" ] } ], "source": [ "print(stocks.columns)" ] }, { "cell_type": "code", "execution_count": null, "id": "3c6d9d05-b203-49ae-869f-7f85ead2c69e", "metadata": {}, "outputs": [], "source": [ "keys = [\n", " \"Registrar Account - ID\",\n", " \"Product - Isin\",\n", " \"Centralisation Date\"\n", "]\n", "\n", "stocks = stocks[keys + [\"Quantity - AUM\"]]\n", "\n", "flows = flows[keys + [\"Quantity - NetFlows\"]]\n", "\n", "flows = (\n", " flows\n", " .groupby(keys, as_index=False)\n", " .sum()\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "d30c2235-281b-41a6-828b-abb6fcfc4183", "metadata": {}, "outputs": [], "source": [ "df = stocks.merge(flows, on=keys, how=\"left\")\n", "\n", "df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", "df = df.sort_values(keys)\n", "\n", "\n", "\n", "# REBUILD ACCOUNTING IDENTITY\n", "\n", "\n", "df[\"prev_aum\"] = df.groupby(\n", " [\"Registrar Account - ID\",\"Product - Isin\"]\n", ")[\"Quantity - AUM\"].shift(1)\n", "\n", "df[\"prev_flow\"] = df.groupby(\n", " [\"Registrar Account - ID\",\"Product - Isin\"]\n", ")[\"Quantity - NetFlows\"].shift(1).fillna(0)\n", "\n", "df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n", "\n", "\n", "\n", "# GAP ANALYSIS\n", "\n", "\n", "df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "df[\"gap_abs\"] = df[\"gap\"].abs()\n", "\n", "EPS = 10\n", "\n", "df[\"rupture_flag\"] = (\n", " df[\"prev_aum\"].notna()\n", " & (df[\"gap_abs\"] > EPS)\n", ")\n", "\n", "\n", "\n", "# PARAMETERS\n", "\n", "\n", "GAP_TOL = 1e-6\n", "REL_GAP_THR = 0.05\n", "MIN_PERSISTENCE = 3\n", "\n", "\n", "\n", "# SORT DATA\n", "\n", "\n", "df = df.sort_values(\n", " [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n", ")\n", "\n", "df[\"corrected_aum\"] = df[\"Quantity - AUM\"]\n", "df[\"repair_flag\"] = False\n", "\n", "\n", "# REBUILD EXPECTED AUM BEFORE REPAIR\n", "\n", "\n", "df = df.sort_values(\n", " [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n", ")\n", "\n", "df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"prev_flow\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " .fillna(0)\n", ")\n", "\n", "df[\"expected_stock\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n", "\n", "#delete negative AUM\n", "df = df[df[\"Quantity - AUM\"] >= 0]" ] }, { "cell_type": "code", "execution_count": null, "id": "efd374d0-6393-45f2-926e-2c29249cd078", "metadata": {}, "outputs": [], "source": [ "def repair_group(g):\n", "\n", " g = g.copy()\n", "\n", " obs = g[\"Quantity - AUM\"].values\n", " flows = g[\"Quantity - NetFlows\"].values\n", "\n", " corrected = obs.copy()\n", "\n", " \n", " # Build expected AUM path\n", " \n", "\n", " expected = np.empty_like(obs)\n", " expected[0] = np.nan\n", "\n", " for t in range(1, len(obs)):\n", " expected[t] = corrected[t-1] + flows[t-1]\n", "\n", " gap = obs - expected\n", "\n", " rel_gap = np.abs(gap) / np.maximum(np.abs(expected), 1.0)\n", "\n", " idx = None\n", "\n", " \n", " # Detect persistent shift\n", " \n", "\n", " for i in range(1, len(obs) - MIN_PERSISTENCE):\n", "\n", " if (\n", " rel_gap[i] > REL_GAP_THR\n", " and np.all(np.abs(gap[i:i+MIN_PERSISTENCE] - gap[i]) < GAP_TOL)\n", " and np.all(np.abs(np.diff(flows[i:i+MIN_PERSISTENCE])) < GAP_TOL)\n", " ):\n", " idx = i\n", " break\n", "\n", " if idx is None:\n", " return g\n", "\n", " \n", " # Compute shift\n", " \n", "\n", " shift = gap[idx]\n", "\n", " candidate = obs[idx:] - shift\n", "\n", " \n", " # SAFETY CHECKS\n", " \n", "\n", " # 1. do not allow negative AUM\n", " # refuse repair if it creates NEW negative AUM\n", " if ((candidate < 0) & (obs[idx:] >= 0)).any():\n", " return g\n", "\n", " # 2. avoid extreme corrections\n", " if abs(shift) > 2 * np.nanmax(np.abs(obs)):\n", " return g\n", "\n", " \n", " # Apply correction\n", " \n", "\n", " corrected[idx:] = candidate\n", "\n", " g.loc[g.index[idx]:, \"repair_flag\"] = True\n", "\n", " \n", " # Rebuild expected path after repair\n", " \n", "\n", " expected_corr = np.empty_like(obs)\n", " expected_corr[0] = np.nan\n", "\n", " for t in range(1, len(obs)):\n", " expected_corr[t] = corrected[t-1] + flows[t-1]\n", "\n", " g[\"corrected_aum\"] = corrected\n", " g[\"expected_stock_corr\"] = expected_corr\n", "\n", " return g" ] }, { "cell_type": "code", "execution_count": null, "id": "fe1f869c-0a00-47e0-9355-3705b23561c7", "metadata": {}, "outputs": [], "source": [ "def repair_group(g):\n", "\n", " g = g.copy()\n", "\n", " obs = g[\"Quantity - AUM\"].values\n", " flows = g[\"Quantity - NetFlows\"].values\n", "\n", " corrected = obs.copy()\n", "\n", " # Build expected AUM path\n", " expected = np.empty_like(obs)\n", " expected[0] = np.nan\n", "\n", " for t in range(1, len(obs)):\n", " expected[t] = corrected[t-1] + flows[t-1]\n", "\n", " gap = obs - expected\n", " rel_gap = np.abs(gap) / np.maximum(np.abs(expected), 1.0)\n", "\n", " idx = None\n", " shift = None\n", "\n", " for i in range(1, len(obs) - MIN_PERSISTENCE - 1):\n", "\n", " # ------------------------------------------------\n", " # CASE 1 — standard persistent shift (original algo)\n", " # ------------------------------------------------\n", " if (\n", " rel_gap[i] > REL_GAP_THR\n", " and np.all(np.abs(gap[i:i+MIN_PERSISTENCE] - gap[i]) < GAP_TOL)\n", " and np.all(np.abs(np.diff(flows[i:i+MIN_PERSISTENCE])) < GAP_TOL)\n", " ):\n", " idx = i\n", " shift = gap[i]\n", " break\n", "\n", " # ------------------------------------------------\n", " # CASE 2 — double shift\n", " # ------------------------------------------------\n", " if (\n", " rel_gap[i] > REL_GAP_THR\n", " and rel_gap[i+1] > REL_GAP_THR\n", " and np.all(np.abs(gap[i+1:i+1+MIN_PERSISTENCE] - gap[i+1]) < GAP_TOL)\n", " and np.all(np.abs(np.diff(flows[i+1:i+1+MIN_PERSISTENCE])) < GAP_TOL)\n", " ):\n", " idx = i\n", " shift = gap[i+1]\n", " break\n", "\n", " if idx is None:\n", " return g\n", "\n", " # Apply shift\n", " candidate = obs[idx:] - shift\n", "\n", " # Safety checks\n", "\n", " # avoid creating new negative AUM\n", " if ((candidate < 0) & (obs[idx:] >= 0)).any():\n", " return g\n", "\n", " # avoid extreme corrections\n", " if abs(shift) > 2 * np.nanmax(np.abs(obs)):\n", " return g\n", "\n", " corrected[idx:] = candidate\n", "\n", " g.loc[g.index[idx]:, \"repair_flag\"] = True\n", "\n", " # rebuild expected path after repair\n", " expected_corr = np.empty_like(obs)\n", " expected_corr[0] = np.nan\n", "\n", " for t in range(1, len(obs)):\n", " expected_corr[t] = corrected[t-1] + flows[t-1]\n", "\n", " g[\"corrected_aum\"] = corrected\n", " g[\"expected_stock_corr\"] = expected_corr\n", "\n", " return g" ] }, { "cell_type": "code", "execution_count": 12, "id": "1ca2a5ab-354f-49af-b1aa-75c93d48de06", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_5465/2911292439.py:10: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n", " stocks = pd.read_csv(f, sep=\";\")\n", "/tmp/ipykernel_5465/2911292439.py:16: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n", " flows = pd.read_csv(f, sep=\";\")\n", "/tmp/ipykernel_5465/2911292439.py:127: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " .apply(repair_group)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "stock_repaired.csv successfully created\n" ] } ], "source": [ "#FULL STOCK REPAIR\n", "\n", "# ============================================================\n", "# LOAD DATA\n", "# ============================================================\n", "\n", "with fs.open('projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n", " stocks = pd.read_csv(f, sep=\";\")\n", "\n", "with fs.open(\n", " \"projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv\",\n", " \"rb\"\n", ") as f:\n", " flows = pd.read_csv(f, sep=\";\")\n", "\n", "stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", "flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n", "\n", "# ============================================================\n", "# MERGE FLOWS\n", "# ============================================================\n", "\n", "df = stocks.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", ")\n", "\n", "df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", "df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", ")\n", "\n", "# ============================================================\n", "# PARAMETERS\n", "# ============================================================\n", "\n", "REL_GAP_THR = 0.3\n", "MIN_PERSISTENCE = 3\n", "GAP_TOL = 1e-6\n", "\n", "# ============================================================\n", "# REPAIR FUNCTION\n", "# ============================================================\n", "\n", "def repair_group(g):\n", "\n", " g = g.copy()\n", "\n", " obs = g[\"Quantity - AUM\"].values\n", " flows = g[\"Quantity - NetFlows\"].values\n", "\n", " corrected = obs.copy()\n", "\n", " expected = np.empty_like(obs)\n", " expected[0] = np.nan\n", "\n", " for t in range(1, len(obs)):\n", " expected[t] = corrected[t-1] + flows[t-1]\n", "\n", " gap = obs - expected\n", " rel_gap = np.abs(gap) / np.maximum(np.abs(expected), 1.0)\n", "\n", " idx = None\n", " shift = None\n", "\n", " for i in range(1, len(obs) - MIN_PERSISTENCE - 1):\n", "\n", " # CASE 1 — persistent shift\n", " if (\n", " rel_gap[i] > REL_GAP_THR\n", " and np.all(np.abs(gap[i:i+MIN_PERSISTENCE] - gap[i]) < GAP_TOL)\n", " and np.all(np.abs(np.diff(flows[i:i+MIN_PERSISTENCE])) < GAP_TOL)\n", " ):\n", " idx = i\n", " shift = gap[i]\n", " break\n", "\n", " # CASE 2 — double shift\n", " if (\n", " rel_gap[i] > REL_GAP_THR\n", " and rel_gap[i+1] > REL_GAP_THR\n", " and np.all(np.abs(gap[i+1:i+1+MIN_PERSISTENCE] - gap[i+1]) < GAP_TOL)\n", " and np.all(np.abs(np.diff(flows[i+1:i+1+MIN_PERSISTENCE])) < GAP_TOL)\n", " ):\n", " idx = i\n", " shift = gap[i+1]\n", " break\n", "\n", " if idx is None:\n", " return g\n", "\n", " candidate = obs[idx:] - shift\n", "\n", " if ((candidate < 0) & (obs[idx:] >= 0)).any():\n", " return g\n", "\n", " if abs(shift) > 2 * np.nanmax(np.abs(obs)):\n", " return g\n", "\n", " corrected[idx:] = candidate\n", "\n", " g.loc[g.index[idx]:, \"repair_flag\"] = True\n", "\n", " expected_corr = np.empty_like(obs)\n", " expected_corr[0] = np.nan\n", "\n", " for t in range(1, len(obs)):\n", " expected_corr[t] = corrected[t-1] + flows[t-1]\n", "\n", " g[\"corrected_aum\"] = corrected\n", "\n", " return g\n", "\n", "# ============================================================\n", "# APPLY REPAIR\n", "# ============================================================\n", "\n", "df_repaired = (\n", " df.groupby(\n", " [\"Registrar Account - ID\",\"Product - Isin\"],\n", " group_keys=False\n", " )\n", " .apply(repair_group)\n", ")\n", "\n", "# ============================================================\n", "# REBUILD STOCK FILE\n", "# ============================================================\n", "\n", "stocks_repaired = stocks.copy()\n", "\n", "stocks_repaired[\"Quantity - AUM\"] = df_repaired[\"corrected_aum\"].fillna(\n", " stocks[\"Quantity - AUM\"]\n", ")\n", "\n", "# ============================================================\n", "# SAVE WITH ORIGINAL FORMAT\n", "# ============================================================\n", "\n", "stocks_repaired.to_csv(\n", " \"stock_repaired.csv\",\n", " sep=\";\",\n", " index=False\n", ")\n", "\n", "print(\"stock_repaired.csv successfully created\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "f94f07b4-e053-4828-bbb1-3697f9a11751", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_5465/3656779442.py:4: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " .apply(repair_group)\n" ] }, { "ename": "KeyError", "evalue": "'expected_stock_corr'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/indexes/base.py:3812\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3811\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m3812\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_engine\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3813\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:167\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:196\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7096\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[31mKeyError\u001b[39m: 'expected_stock_corr'", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[15]\u001b[39m\u001b[32m, line 25\u001b[39m\n\u001b[32m 22\u001b[39m df[\u001b[33m\"\u001b[39m\u001b[33mexpected_stock\u001b[39m\u001b[33m\"\u001b[39m] = df[\u001b[33m\"\u001b[39m\u001b[33mprev_aum\u001b[39m\u001b[33m\"\u001b[39m] + df[\u001b[33m\"\u001b[39m\u001b[33mprev_flow\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 23\u001b[39m df[\u001b[33m\"\u001b[39m\u001b[33mgap_before\u001b[39m\u001b[33m\"\u001b[39m] = df[\u001b[33m\"\u001b[39m\u001b[33mQuantity - AUM\u001b[39m\u001b[33m\"\u001b[39m] - df[\u001b[33m\"\u001b[39m\u001b[33mexpected_stock\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m---> \u001b[39m\u001b[32m25\u001b[39m df[\u001b[33m\"\u001b[39m\u001b[33mgap_after\u001b[39m\u001b[33m\"\u001b[39m] = df[\u001b[33m\"\u001b[39m\u001b[33mcorrected_aum\u001b[39m\u001b[33m\"\u001b[39m] - \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mexpected_stock_corr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m 27\u001b[39m df[\u001b[33m\"\u001b[39m\u001b[33mrupture_before\u001b[39m\u001b[33m\"\u001b[39m] = df[\u001b[33m\"\u001b[39m\u001b[33mgap_before\u001b[39m\u001b[33m\"\u001b[39m].abs() > GAP_TOL\n\u001b[32m 29\u001b[39m df[\u001b[33m\"\u001b[39m\u001b[33mrupture_after\u001b[39m\u001b[33m\"\u001b[39m] = df[\u001b[33m\"\u001b[39m\u001b[33mgap_after\u001b[39m\u001b[33m\"\u001b[39m].abs() > GAP_TOL\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/frame.py:4113\u001b[39m, in \u001b[36mDataFrame.__getitem__\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 4111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.columns.nlevels > \u001b[32m1\u001b[39m:\n\u001b[32m 4112\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._getitem_multilevel(key)\n\u001b[32m-> \u001b[39m\u001b[32m4113\u001b[39m indexer = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4114\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[32m 4115\u001b[39m indexer = [indexer]\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/indexes/base.py:3819\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3814\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[32m 3815\u001b[39m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc.Iterable)\n\u001b[32m 3816\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[32m 3817\u001b[39m ):\n\u001b[32m 3818\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[32m-> \u001b[39m\u001b[32m3819\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01merr\u001b[39;00m\n\u001b[32m 3820\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[32m 3821\u001b[39m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[32m 3822\u001b[39m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[32m 3823\u001b[39m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[32m 3824\u001b[39m \u001b[38;5;28mself\u001b[39m._check_indexing_error(key)\n", "\u001b[31mKeyError\u001b[39m: 'expected_stock_corr'" ] } ], "source": [ "df = (\n", " df\n", " .groupby([\"Registrar Account - ID\", \"Product - Isin\"], group_keys=False)\n", " .apply(repair_group)\n", ")\n", "\n", "# VALIDATION BEFORE / AFTER\n", "\n", "df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"prev_flow\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " .fillna(0)\n", ")\n", "\n", "df[\"expected_stock\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n", "df[\"gap_before\"] = df[\"Quantity - AUM\"] - df[\"expected_stock\"]\n", "\n", "df[\"gap_after\"] = df[\"corrected_aum\"] - df[\"expected_stock_corr\"]\n", "\n", "df[\"rupture_before\"] = df[\"gap_before\"].abs() > GAP_TOL\n", "\n", "df[\"rupture_after\"] = df[\"gap_after\"].abs() > GAP_TOL\n", "\n", "\n", "\n", "# SUMMARY\n", "\n", "\n", "summary = pd.DataFrame({\n", " \"Before repair\": [df[\"rupture_before\"].sum()],\n", " \"After repair\": [df[\"rupture_after\"].sum()],\n", " \"Repaired points\": [df[\"repair_flag\"].sum()]\n", "})\n", "\n", "print(summary)\n", "\n", "\n", "\n", "# BUILD REPAIRED DATASET\n", "\n", "\n", "stocks_repaired = stocks.copy()\n", "\n", "repair_map = df[[\n", " \"Registrar Account - ID\",\n", " \"Product - Isin\",\n", " \"Centralisation Date\",\n", " \"corrected_aum\",\n", " \"repair_flag\"\n", "]]\n", "\n", "stocks_repaired = stocks_repaired.merge(\n", " repair_map,\n", " on=keys,\n", " how=\"left\"\n", ")\n", "\n", "stocks_repaired[\"Quantity - AUM\"] = np.where(\n", " stocks_repaired[\"repair_flag\"] == True,\n", " stocks_repaired[\"corrected_aum\"],\n", " stocks_repaired[\"Quantity - AUM\"]\n", ")\n", "\n", "stocks_repaired.to_csv(\"stock_repaired.csv\", index=False)\n", "\n", "\n", "\n", "# COMPARISON RAW VS REPAIRED\n", "\n", "\n", "df_compare = stocks.merge(\n", " stocks_repaired,\n", " on=keys,\n", " how=\"inner\",\n", " suffixes=(\"_raw\",\"_repaired\")\n", ")\n", "\n", "df_compare[\"aum_diff\"] = (\n", " df_compare[\"Quantity - AUM_repaired\"]\n", " - df_compare[\"Quantity - AUM_raw\"]\n", ")\n", "\n", "print(\"\\nNUMBER OF MODIFIED OBSERVATIONS:\",\n", " (df_compare[\"aum_diff\"] != 0).sum())\n", "\n", "print(\"Share modified:\",\n", " round((df_compare[\"aum_diff\"] != 0).mean()*100,2), \"%\")\n", "\n", "print(\"\\nTOTAL AUM\")\n", "\n", "print(\"Raw total :\", df_compare[\"Quantity - AUM_raw\"].sum())\n", "print(\"Repaired total :\", df_compare[\"Quantity - AUM_repaired\"].sum())\n", "\n", "\n", "\n", "# RUPTURE DISTRIBUTION BEFORE / AFTER\n", "\n", "\n", "def rupture_distribution(df, flag):\n", "\n", " rupture_summary = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " .agg(\n", " n_ruptures=(flag,\"sum\"),\n", " total_obs=(flag,\"count\"),\n", " rupture_ratio=(flag,\"mean\")\n", " )\n", " .reset_index()\n", " )\n", "\n", " rs = rupture_summary.copy()\n", "\n", " bins = [0,0.01,0.10,0.30,1.01]\n", "\n", " labels = [\n", " \"Clean / quasi-clean (≤1%)\",\n", " \"Moderate (1–10%)\",\n", " \"High (10–30%)\",\n", " \"Severe (>30%)\"\n", " ]\n", "\n", " rs[\"rupture_class\"] = pd.cut(\n", " rs[\"rupture_ratio\"],\n", " bins=bins,\n", " labels=labels,\n", " include_lowest=True\n", " )\n", "\n", " dist = (\n", " rs[\"rupture_class\"]\n", " .value_counts(normalize=True)\n", " .sort_index()\n", " * 100\n", " ).round(1)\n", "\n", " return dist\n", "\n", "\n", "dist_before = rupture_distribution(df,\"rupture_before\")\n", "dist_after = rupture_distribution(df,\"rupture_after\")" ] }, { "cell_type": "code", "execution_count": null, "id": "54491736-58b3-4ef7-b6c4-5534ec796bce", "metadata": {}, "outputs": [], "source": [ "# DONUT CHART BEFORE / AFTER\n", "\n", "fig = go.Figure()\n", "\n", "fig.add_trace(go.Pie(\n", " labels=dist_before.index,\n", " values=dist_before.values,\n", " hole=0.45,\n", " name=\"Before repair\",\n", " domain=dict(x=[0,0.48]),\n", " textinfo=\"percent\"\n", "))\n", "\n", "fig.add_trace(go.Pie(\n", " labels=dist_after.index,\n", " values=dist_after.values,\n", " hole=0.45,\n", " name=\"After repair\",\n", " domain=dict(x=[0.52,1]),\n", " textinfo=\"percent\"\n", "))\n", "\n", "fig.update_layout(\n", " title=\"Rupture intensity distribution (Before vs After repair)\",\n", " annotations=[\n", " dict(text=\"Before repair\", x=0.22, y=0.5, showarrow=False),\n", " dict(text=\"After repair\", x=0.78, y=0.5, showarrow=False)\n", " ]\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "d844f6f0-c0f4-4f71-8280-1fd39ced83b7", "metadata": {}, "outputs": [], "source": [ "# LOAD DATA\n", "\n", "aum = pd.read_csv(\"stock_repaired.csv\")\n", "\n", "aum[\"Centralisation Date\"] = pd.to_datetime(aum[\"Centralisation Date\"])\n", "\n", "\n", "# KEEP USEFUL COLUMNS\n", "\n", "aum = aum[[\n", " \"Registrar Account - ID\",\n", " \"Product - Isin\",\n", " \"Centralisation Date\",\n", " \"Quantity - AUM\",\n", " \"repair_flag\"\n", "]]\n", "\n", "flows = flows[[\n", " \"Registrar Account - ID\",\n", " \"Product - Isin\",\n", " \"Centralisation Date\",\n", " \"Quantity - NetFlows\"\n", "]]\n", "\n", "\n", "\n", "# AGGREGATE FLOWS\n", "\n", "flows = (\n", " flows\n", " .groupby(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " as_index=False\n", " )[\"Quantity - NetFlows\"]\n", " .sum()\n", ")\n", "\n", "\n", "\n", "# MERGE DATASETS\n", "\n", "df = aum.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", ")\n", "\n", "df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", "print(\"Dataset size:\", df.shape)\n", "\n", "\n", "\n", "# SORT DATA\n", "\n", "df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", ")\n", "\n", "\n", "\n", "# REBUILD ACCOUNTING IDENTITY\n", "\n", "df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"prev_flow\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " .fillna(0)\n", ")\n", "\n", "df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"prev_flow\"]\n", "\n", "\n", "\n", "# GAPS\n", "\n", "df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "\n", "df[\"gap_abs\"] = df[\"gap\"].abs()\n", "\n", "df[\"gap_rel\"] = (\n", " df[\"gap_abs\"] /\n", " df[\"expected_aum\"].abs().clip(lower=1)\n", ")\n", "\n", "\n", "\n", "# ACCOUNTING CONSISTENCY\n", "\n", "print(\"\\nACCOUNTING GAP DISTRIBUTION\")\n", "\n", "print(df[\"gap_abs\"].describe())\n", "\n", "print(\"\\nRelative gap quantiles\")\n", "\n", "print(df[\"gap_rel\"].quantile([0.90,0.95,0.99]))\n", "\n", "\n", "\n", "# NEGATIVE AUM\n", "\n", "neg = (df[\"Quantity - AUM\"] < 0).sum()\n", "\n", "print(\"\\nNEGATIVE AUM:\", neg)\n", "\n", "\n", "\n", "# REPAIR RATE\n", "\n", "print(\"\\nREPAIR RATE\")\n", "\n", "print(df[\"repair_flag\"].mean())\n", "\n", "\n", "\n", "# AUM JUMPS\n", "\n", "\n", "df[\"prev_obs\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"aum_jump\"] = (\n", " df[\"Quantity - AUM\"] /\n", " df[\"prev_obs\"].replace(0,np.nan)\n", ")\n", "\n", "print(\"\\nAUM JUMP QUANTILES\")\n", "\n", "print(df[\"aum_jump\"].quantile([0.90,0.95,0.99]))\n", "\n", "\n", "\n", "# VISUAL CHECK\n", "\n", "\n", "def plot_series(account, isin):\n", "\n", " sub = df[\n", " (df[\"Registrar Account - ID\"] == account) &\n", " (df[\"Product - Isin\"] == isin)\n", " ]\n", "\n", " plt.figure(figsize=(8,3))\n", "\n", " plt.plot(\n", " sub[\"Centralisation Date\"],\n", " sub[\"Quantity - AUM\"],\n", " label=\"AUM\"\n", " )\n", "\n", " plt.plot(\n", " sub[\"Centralisation Date\"],\n", " sub[\"expected_aum\"],\n", " linestyle=\"--\",\n", " label=\"Expected AUM\"\n", " )\n", "\n", " plt.legend()\n", " plt.title(f\"Account {account} — ISIN {isin}\")\n", "\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "f7d759f7-64be-4d82-a79c-98cda407cfec", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# COMPUTE AUM CHANGE\n", "\n", "df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"delta_aum\"] = df[\"Quantity - AUM\"] - df[\"prev_aum\"]\n", "\n", "df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", ")\n", "\n", "\n", "\n", "# FILTER VALID OBSERVATIONS\n", "\n", "\n", "diag = df[\n", " df[\"prev_aum\"].notna() &\n", " df[\"flow_lag\"].notna()\n", "]\n", "\n", "\n", "\n", "# SAMPLE FOR PLOTTING (dataset is large)\n", "\n", "\n", "sample = diag.sample(20000, random_state=1)\n", "\n", "\n", "\n", "# SCATTER PLOT\n", "\n", "\n", "plt.figure(figsize=(7,7))\n", "\n", "plt.scatter(\n", " sample[\"flow_lag\"],\n", " sample[\"delta_aum\"],\n", " alpha=0.3,\n", " s=5\n", ")\n", "\n", "# perfect accounting identity\n", "x = np.linspace(\n", " sample[\"flow_lag\"].min(),\n", " sample[\"flow_lag\"].max(),\n", " 100\n", ")\n", "\n", "plt.plot(x, x, color=\"red\", label=\"Perfect identity\")\n", "\n", "plt.xlabel(\"Flow (t-1)\")\n", "plt.ylabel(\"Δ AUM\")\n", "\n", "plt.title(\"AUM / Flow Accounting Diagnostic\")\n", "\n", "plt.legend()\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "d0a959c9-cfff-44cb-a1df-6c7275ec5b43", "metadata": {}, "outputs": [], "source": [ "df[\"implied_return\"] = (\n", " df[\"Quantity - AUM\"] - df[\"prev_aum\"] - df[\"flow_lag\"]\n", ") / df[\"prev_aum\"].replace(0, np.nan)\n", "\n", "print(df[\"implied_return\"].quantile([0.5,0.9,0.95,0.99]))" ] }, { "cell_type": "code", "execution_count": null, "id": "15111e8a-7d87-4c37-8122-daafe90a1ad5", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# ============================================================\n", "# ADDITIONAL DATASET VALIDATION CHECKS (ROBUST VERSION)\n", "# ============================================================\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "print(\"\\n==============================\")\n", "print(\"ADDITIONAL DATA QUALITY CHECKS\")\n", "print(\"==============================\")\n", "\n", "# ------------------------------------------------------------\n", "# RECOMPUTE KEY VARIABLES IF NEEDED\n", "# ------------------------------------------------------------\n", "\n", "df = df.sort_values([\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"])\n", "\n", "df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"flow_lag\"] = df[\"flow_lag\"].fillna(0)\n", "\n", "df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"flow_lag\"]\n", "\n", "df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "df[\"gap_abs\"] = df[\"gap\"].abs()\n", "\n", "df[\"delta_aum\"] = df[\"Quantity - AUM\"] - df[\"prev_aum\"]\n", "\n", "df[\"implied_return\"] = (\n", " df[\"Quantity - AUM\"] - df[\"prev_aum\"] - df[\"flow_lag\"]\n", ") / df[\"prev_aum\"].replace(0,np.nan)\n", "\n", "df[\"aum_jump\"] = (\n", " df[\"Quantity - AUM\"] /\n", " df[\"prev_aum\"].replace(0,np.nan)\n", ")\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 1. CHECK SERIES WHERE GAP IS STILL LARGE\n", "# ------------------------------------------------------------\n", "\n", "remaining_gaps = df[df[\"gap_abs\"] > 10]\n", "\n", "series_remaining = (\n", " remaining_gaps\n", " .groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " .size()\n", ")\n", "\n", "print(\"\\nSERIES STILL WITH LARGE ACCOUNTING GAPS:\", len(series_remaining))\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 2. GAP DISTRIBUTION\n", "# ------------------------------------------------------------\n", "\n", "print(\"\\nACCOUNTING GAP DISTRIBUTION\")\n", "print(df[\"gap_abs\"].quantile([0.5,0.9,0.95,0.99]))\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 3. IMPLIED RETURNS DISTRIBUTION\n", "# ------------------------------------------------------------\n", "\n", "print(\"\\nIMPLIED RETURN DISTRIBUTION\")\n", "print(df[\"implied_return\"].quantile([0.5,0.9,0.95,0.99]))\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 4. EXTREME RETURNS\n", "# ------------------------------------------------------------\n", "\n", "extreme_returns = df[df[\"implied_return\"].abs() > 2]\n", "\n", "print(\"\\nOBSERVATIONS WITH EXTREME IMPLIED RETURNS (>200%):\",\n", " len(extreme_returns))\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 5. FLOW / AUM ACCOUNTING CORRELATION\n", "# ------------------------------------------------------------\n", "\n", "valid = df[\n", " df[\"prev_aum\"].notna() &\n", " df[\"flow_lag\"].notna()\n", "]\n", "\n", "corr = valid[\"delta_aum\"].corr(valid[\"flow_lag\"])\n", "\n", "print(\"\\nCORRELATION ΔAUM vs FLOW:\", corr)\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 6. LARGE AUM JUMPS\n", "# ------------------------------------------------------------\n", "\n", "large_jumps = df[df[\"aum_jump\"].abs() > 5]\n", "\n", "print(\"\\nLARGE AUM JUMPS (>5x):\", len(large_jumps))\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 7. SERIES WITH HIGH GAP RATE\n", "# ------------------------------------------------------------\n", "\n", "series_gap_rate = (\n", " (df[\"gap_abs\"] > 10)\n", " .groupby([df[\"Registrar Account - ID\"], df[\"Product - Isin\"]])\n", " .mean()\n", ")\n", "\n", "problem_series = series_gap_rate[series_gap_rate > 0.2]\n", "\n", "print(\"\\nSERIES WITH >20% ACCOUNTING GAPS:\", len(problem_series))\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 8. TOTAL AUM STABILITY\n", "# ------------------------------------------------------------\n", "\n", "if \"df_compare\" in globals():\n", "\n", " raw_total = df_compare[\"Quantity - AUM_raw\"].sum()\n", " repaired_total = df_compare[\"Quantity - AUM_repaired\"].sum()\n", "\n", " print(\"\\nTOTAL AUM RAW:\", raw_total)\n", " print(\"TOTAL AUM REPAIRED:\", repaired_total)\n", "\n", " print(\"RELATIVE CHANGE:\",\n", " (repaired_total - raw_total) / raw_total)\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 9. PROPORTION OF SERIES REPAIRED\n", "# ------------------------------------------------------------\n", "\n", "if \"repair_flag\" in df.columns:\n", "\n", " series_repaired = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"repair_flag\"]\n", " .max()\n", " )\n", "\n", " print(\"\\nSERIES WITH AT LEAST ONE REPAIR:\",\n", " series_repaired.mean())\n", "\n", "\n", "# ------------------------------------------------------------\n", "# 10. WORST SERIES (MANUAL CHECK)\n", "# ------------------------------------------------------------\n", "\n", "worst_series = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"gap_abs\"]\n", " .max()\n", " .sort_values(ascending=False)\n", " .head(10)\n", ")\n", "\n", "print(\"\\nWORST SERIES AFTER REPAIR\")\n", "print(worst_series)" ] }, { "cell_type": "code", "execution_count": null, "id": "f9a4fd91-bb8b-4172-a267-cbe7f2e4fae7", "metadata": {}, "outputs": [], "source": [ "print(\"RUPTURES BEFORE:\", summary[\"Before repair\"].iloc[0])\n", "print(\"RUPTURES AFTER :\", summary[\"After repair\"].iloc[0])\n", "print(\"REDUCTION RATE :\", 1 - summary[\"After repair\"].iloc[0] /\n", " summary[\"Before repair\"].iloc[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "68596521-a10a-479a-a6cd-36b6be3c55b9", "metadata": {}, "outputs": [], "source": [ "# CHECK IF REPAIR CREATED NEW NEGATIVE AUM\n", "\n", "df_compare = stocks.merge(\n", " stocks_repaired,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"inner\",\n", " suffixes=(\"_raw\",\"_repaired\")\n", ")\n", "\n", "neg_raw = (\n", " df_compare[df_compare[\"Quantity - AUM_raw\"] < 0]\n", " .groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " .size()\n", ")\n", "\n", "neg_rep = (\n", " df_compare[df_compare[\"Quantity - AUM_repaired\"] < 0]\n", " .groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " .size()\n", ")\n", "\n", "print(\"Negative series BEFORE repair:\", len(neg_raw))\n", "print(\"Negative series AFTER repair:\", len(neg_rep))\n", "\n", "created_neg = set(neg_rep.index) - set(neg_raw.index)\n", "\n", "print(\"\\nNumber of series where repair created negatives:\", len(created_neg))\n", "\n", "if len(created_neg) > 0:\n", " print(\"\\nSeries concerned:\")\n", " print(created_neg)\n", "\n", "\n", "\n", "for acc, isin in created_neg:\n", "\n", " sub = df_compare[\n", " (df_compare[\"Registrar Account - ID\"] == acc) &\n", " (df_compare[\"Product - Isin\"] == isin)\n", " ].sort_values(\"Centralisation Date\").reset_index(drop=True)\n", "\n", " # indices where repaired AUM becomes negative\n", " neg_idx = sub.index[sub[\"Quantity - AUM_repaired\"] < 0]\n", "\n", " print(\"\\n======================================\")\n", " print(\"Account:\", acc, \"ISIN:\", isin)\n", "\n", " for i in neg_idx:\n", "\n", " start = max(0, i-3)\n", " end = min(len(sub), i+3)\n", "\n", " print(\"\\nContext around created negative:\")\n", " print(\n", " sub.loc[start:end, [\n", " \"Centralisation Date\",\n", " \"Quantity - AUM_raw\",\n", " \"Quantity - AUM_repaired\"\n", " ]]\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "203797d1-c380-406d-ac6e-78c4e1228966", "metadata": {}, "outputs": [], "source": [ "# top N worst series\n", "N = 20\n", "\n", "worst_series = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])[\"gap_abs\"]\n", " .max()\n", " .sort_values(ascending=False)\n", " .head(N)\n", ")\n", "\n", "print(worst_series)" ] }, { "cell_type": "code", "execution_count": null, "id": "17c6be03-3b76-41e7-906f-d86472bda274", "metadata": {}, "outputs": [], "source": [ "def plot_account_series(account, isin):\n", "\n", " sub = df[\n", " (df[\"Registrar Account - ID\"] == account) &\n", " (df[\"Product - Isin\"] == isin)\n", " ].sort_values(\"Centralisation Date\")\n", "\n", " plt.figure(figsize=(10,4))\n", "\n", " plt.plot(\n", " sub[\"Centralisation Date\"],\n", " sub[\"Quantity - AUM\"],\n", " label=\"AUM\",\n", " linewidth=2\n", " )\n", "\n", " plt.plot(\n", " sub[\"Centralisation Date\"],\n", " sub[\"expected_aum\"],\n", " linestyle=\"--\",\n", " label=\"Expected AUM\"\n", " )\n", "\n", " # highlight large gaps\n", " ruptures = sub[sub[\"gap_abs\"] > 10]\n", "\n", " plt.scatter(\n", " ruptures[\"Centralisation Date\"],\n", " ruptures[\"Quantity - AUM\"],\n", " color=\"red\",\n", " label=\"Rupture\",\n", " s=40\n", " )\n", "\n", " plt.title(f\"Account {account} | ISIN {isin}\")\n", " plt.legend()\n", " plt.grid(alpha=0.3)\n", "\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "02a22e9a-a71c-4212-8f0a-f33c18e4b530", "metadata": {}, "outputs": [], "source": [ "for acc, isin in worst_series.index:\n", " plot_account_series(acc, isin)" ] }, { "cell_type": "code", "execution_count": null, "id": "c72e2dc2-c35d-4608-a3da-57a37acf64c7", "metadata": {}, "outputs": [], "source": [ "def run_data_challenge(stocks, flows):\n", "\n", " # conversion dates\n", " stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", " flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n", "\n", " # merge datasets\n", " df = stocks.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", " )\n", "\n", " df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", " # sort\n", " df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", " )\n", "\n", " # previous values\n", " df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", " )\n", "\n", " df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " ).fillna(0)\n", "\n", " # expected AUM\n", " df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"flow_lag\"]\n", "\n", " # gap\n", " df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "\n", " # build score\n", " df[\"score\"] = np.exp(-np.abs(df[\"gap\"]) / (df[\"expected_aum\"].abs()+1))\n", "\n", " # score timeline\n", " score_timeline = (\n", " df.groupby(\"Centralisation Date\")[\"score\"]\n", " .sum()\n", " .reset_index()\n", " .rename(columns={\"Centralisation Date\":\"date\",\n", " \"score\":\"sum_scores\"})\n", " .sort_values(\"date\")\n", " )\n", "\n", " code_changes = pd.DataFrame() # placeholder\n", "\n", " return score_timeline, code_changes" ] }, { "cell_type": "code", "execution_count": null, "id": "67787ce5-39ae-4a3a-ba22-e9d38bcaf8d3", "metadata": {}, "outputs": [], "source": [ "# ============================================================\n", "# RUN DATA CHALLENGE ON RAW AND CLEAN DATASETS\n", "# ============================================================\n", "\n", "DATASETS = {\n", " \"raw\": \"stocks.csv\",\n", " \"clean\": \"stock_repaired.csv\"\n", "}\n", "\n", "results = {}\n", "\n", "for name, file in DATASETS.items():\n", "\n", " print(\"\\n====================================\")\n", " print(\"RUNNING DATA CHALLENGE ON:\", name)\n", " print(\"====================================\")\n", "\n", " # load datasets\n", " stocks = pd.read_csv(file, low_memory=False)\n", " flows = pd.read_csv(\"flows.csv\", low_memory=False)\n", "\n", " # run the full algorithm\n", " score_timeline, code_changes = run_data_challenge(stocks, flows)\n", "\n", " # store results\n", " results[name] = score_timeline.copy()\n", "\n", "\n", "# ============================================================\n", "# BUILD COMPARISON TABLE\n", "# ============================================================\n", "\n", "comparison = (\n", " results[\"raw\"][[\"date\", \"sum_scores\"]]\n", " .rename(columns={\"sum_scores\": \"raw_scores\"})\n", " .merge(\n", " results[\"clean\"][[\"date\", \"sum_scores\"]]\n", " .rename(columns={\"sum_scores\": \"clean_scores\"}),\n", " on=\"date\",\n", " how=\"outer\"\n", " )\n", " .sort_values(\"date\")\n", ")\n", "\n", "# improvement from cleaning\n", "comparison[\"improvement\"] = (\n", " comparison[\"clean_scores\"] - comparison[\"raw_scores\"]\n", ")\n", "\n", "# relative improvement\n", "comparison[\"relative_improvement\"] = (\n", " comparison[\"improvement\"] /\n", " comparison[\"raw_scores\"].replace(0, np.nan)\n", ")\n", "\n", "print(\"\\n==============================\")\n", "print(\"RAW VS CLEAN SCORE COMPARISON\")\n", "print(\"==============================\")\n", "\n", "print(comparison.head())\n", "print(comparison.tail())" ] }, { "cell_type": "code", "execution_count": null, "id": "5bdb367f-3764-400a-a20d-793f8d004d82", "metadata": {}, "outputs": [], "source": [ "# ============================================================\n", "# PARAMETERS\n", "# ============================================================\n", "\n", "TARGET_DATE = pd.Timestamp(\"2025-10-31\")\n", "AUM_THRESHOLD = 5_000_000\n", "EXCLUDED = [\"OFF DISTRIBUTION\", \"PRIVATE CLIENTS\"]\n", "\n", "ALPHA = 5 # penalty strength for accounting error\n", "\n", "stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", "flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n", "\n", "stocks[\"Registrar Account - ID\"] = stocks[\"Registrar Account - ID\"].astype(str).str.strip()\n", "flows[\"Registrar Account - ID\"] = flows[\"Registrar Account - ID\"].astype(str).str.strip()\n", "\n", "stocks[\"Product - Isin\"] = stocks[\"Product - Isin\"].astype(str)\n", "flows[\"Product - Isin\"] = flows[\"Product - Isin\"].astype(str)\n", "\n", "# ============================================================\n", "# REMOVE EXCLUDED ACCOUNTS\n", "# ============================================================\n", "\n", "stocks = stocks[~stocks[\"Registrar Account - ID\"].str.upper().isin(EXCLUDED)]\n", "flows = flows[~flows[\"Registrar Account - ID\"].str.upper().isin(EXCLUDED)]" ] }, { "cell_type": "code", "execution_count": null, "id": "b10832d6-988a-42cd-a6b6-4dfb690315b7", "metadata": {}, "outputs": [], "source": [ "# ============================================================\n", "# SELECT UNIVERSE AT TARGET DATE\n", "# ============================================================\n", "\n", "latest = stocks[stocks[\"Centralisation Date\"] == TARGET_DATE]\n", "\n", "account_aum = (\n", " latest.groupby(\"Registrar Account - ID\")[\"Quantity - AUM\"]\n", " .sum()\n", " .sort_values(ascending=False)\n", ")\n", "\n", "# remove negative values if any\n", "account_aum = account_aum[account_aum > 0]\n", "\n", "# cumulative coverage\n", "cum_aum = account_aum.cumsum()\n", "total_aum = account_aum.sum()\n", "\n", "coverage = cum_aum / total_aum\n", "\n", "# select accounts covering 97% of AUM\n", "selected_accounts = account_aum[coverage <= 0.97]\n", "\n", "# ensure at least one more account\n", "selected_accounts = account_aum.iloc[:len(selected_accounts)+1]\n", "\n", "# weights at t0\n", "weights = selected_accounts / selected_accounts.sum()\n", "\n", "print(\"\\nUNIVERSE SELECTION\")\n", "print(\"------------------\")\n", "print(\"Number of selected accounts:\", len(selected_accounts))\n", "print(\"Coverage:\", selected_accounts.sum() / total_aum)\n", "\n", "print(\"\\nTop accounts:\")\n", "print(selected_accounts.head(10))\n", "\n", "# ============================================================\n", "# BUILD DATASET\n", "# ============================================================\n", "\n", "df = stocks.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", ")\n", "\n", "df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", "df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", ")\n", "\n", "# ============================================================\n", "# COMPUTE PREVIOUS VALUES\n", "# ============================================================\n", "\n", "df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", ")\n", "\n", "df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", ").fillna(0)\n", "\n", "df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"flow_lag\"]\n", "\n", "df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "\n", "df[\"rel_error\"] = (\n", " df[\"gap\"].abs() /\n", " df[\"expected_aum\"].abs().clip(lower=1)\n", ")\n", "\n", "# ============================================================\n", "# ACCOUNT LEVEL ERROR\n", "# ============================================================\n", "\n", "account_error = (\n", " df.groupby([\"Centralisation Date\",\"Registrar Account - ID\"])\n", " [\"rel_error\"]\n", " .mean()\n", " .reset_index()\n", ")\n", "\n", "# ============================================================\n", "# FAST SCORE BACKWARD PROPAGATION\n", "# ============================================================\n", "\n", "# pivot errors into matrix\n", "error_matrix = (\n", " account_error\n", " .pivot(\n", " index=\"Centralisation Date\",\n", " columns=\"Registrar Account - ID\",\n", " values=\"rel_error\"\n", " )\n", ")\n", "\n", "# keep only selected accounts\n", "error_matrix = error_matrix[selected_accounts.index]\n", "\n", "# fill missing errors\n", "error_matrix = error_matrix.fillna(0)\n", "\n", "# keep only dates <= target\n", "error_matrix = error_matrix.loc[error_matrix.index <= TARGET_DATE]\n", "\n", "# sort dates\n", "error_matrix = error_matrix.sort_index()\n", "\n", "dates = error_matrix.index.values\n", "\n", "# convert to numpy for speed\n", "errors = error_matrix.values\n", "\n", "# initial scores\n", "scores = weights.loc[selected_accounts.index].values\n", "\n", "score_history = []\n", "\n", "# backward propagation\n", "for i in range(len(dates)-1, -1, -1):\n", "\n", " err = errors[i]\n", "\n", " quality = np.exp(-ALPHA * err)\n", "\n", " scores = scores * quality\n", "\n", " score_history.append({\n", " \"date\": dates[i],\n", " \"sum_scores\": scores.sum()\n", " })\n", "\n", "score_timeline = (\n", " pd.DataFrame(score_history)\n", " .sort_values(\"date\")\n", ")\n", "\n", "# ============================================================\n", "# NORMALISE\n", "# ============================================================\n", "\n", "initial_score = score_timeline[\"sum_scores\"].iloc[-1]\n", "\n", "score_timeline[\"score_retention\"] = (\n", " score_timeline[\"sum_scores\"] / initial_score\n", ")\n", "\n", "# ============================================================\n", "# RESULTS\n", "# ============================================================\n", "\n", "print(\"\\nSCORE TIMELINE\")\n", "print(score_timeline.head())\n", "print(score_timeline.tail())\n", "\n", "# ============================================================\n", "# PLOT\n", "# ============================================================\n", "\n", "plt.figure(figsize=(8,4))\n", "\n", "plt.plot(\n", " score_timeline[\"date\"],\n", " score_timeline[\"score_retention\"]\n", ")\n", "\n", "plt.title(\"Score retention when moving backward in time\")\n", "plt.xlabel(\"Date\")\n", "plt.ylabel(\"Σ Scores / Σ Scores (t0)\")\n", "plt.grid(alpha=0.3)\n", "\n", "plt.show()\n", "\n", "# ============================================================\n", "# SAVE\n", "# ============================================================\n", "\n", "score_timeline.to_csv(\n", " \"data_challenge_score_timeline.csv\",\n", " index=False\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "86d82213-d155-4feb-817e-ef1df50578e8", "metadata": {}, "outputs": [], "source": [ "# ============================================================\n", "# PRECOMPUTE PORTFOLIOS BY DATE\n", "# ============================================================\n", "\n", "portfolio_by_date = {\n", " d: g.droplevel(0)\n", " for d, g in portfolio.groupby(level=0)\n", "}\n", "\n", "flows_by_date = {\n", " d: g.droplevel(0)\n", " for d, g in flows_matrix.groupby(level=0)\n", "}\n", "\n", "# ============================================================\n", "# CODE SURGERY SEARCH (FAST + ROBUST)\n", "# ============================================================\n", "\n", "code_changes = []\n", "\n", "for row in ruptures.itertuples():\n", "\n", " date = row._1\n", " acc = row._2\n", "\n", " # find previous date\n", " prev_date = score_timeline.loc[\n", " score_timeline[\"date\"] < date,\n", " \"date\"\n", " ].max()\n", "\n", " if pd.isna(prev_date):\n", " continue\n", "\n", " if date not in portfolio_by_date:\n", " continue\n", "\n", " if prev_date not in portfolio_by_date:\n", " continue\n", "\n", " # portfolio at t\n", " port_today = portfolio_by_date[date]\n", "\n", " if acc not in port_today.index:\n", " continue\n", "\n", " port_t = port_today.loc[acc]\n", "\n", " # portfolio at t-1\n", " prev_port = portfolio_by_date[prev_date]\n", "\n", " # flows at t-1\n", " prev_flow = flows_by_date.get(prev_date)\n", "\n", " # align flows to portfolio\n", " if prev_flow is not None:\n", "\n", " prev_flow = (\n", " prev_flow\n", " .reindex(index=prev_port.index, columns=prev_port.columns)\n", " .fillna(0)\n", " )\n", "\n", " else:\n", "\n", " prev_flow = pd.DataFrame(\n", " 0,\n", " index=prev_port.index,\n", " columns=prev_port.columns\n", " )\n", "\n", " # convert to numpy\n", " prev_port_mat = prev_port.values\n", " prev_flow_mat = prev_flow.values\n", "\n", " # predicted portfolio\n", " predicted = prev_port_mat + prev_flow_mat\n", "\n", " port_t_vec = port_t.reindex(prev_port.columns).fillna(0).values\n", "\n", " # compute error vectorised\n", " diff = np.abs(predicted - port_t_vec)\n", "\n", " errors = diff.sum(axis=1) / (np.abs(port_t_vec).sum() + 1)\n", "\n", " if len(errors) == 0:\n", " continue\n", "\n", " best_idx = errors.argmin()\n", "\n", " if best_idx >= len(prev_port.index):\n", " continue\n", "\n", " best_code = prev_port.index[best_idx]\n", " best_error = errors[best_idx]\n", "\n", " if best_code != acc and best_error < 0.3:\n", "\n", " code_changes.append({\n", " \"date\": date,\n", " \"old_code\": acc,\n", " \"new_code\": best_code,\n", " \"portfolio_error\": best_error\n", " })\n", "\n", "\n", "# ============================================================\n", "# RESULTS\n", "# ============================================================\n", "\n", "code_changes = pd.DataFrame(code_changes)\n", "\n", "print(\"\\nDetected distributor code changes:\")\n", "print(code_changes.head())\n", "\n", "code_changes.to_csv(\"detected_code_changes.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "452b8321-26c5-4229-9992-43c38eb5253f", "metadata": {}, "outputs": [], "source": [ "def detect_code_changes(portfolio, flows_matrix, ruptures, score_timeline):\n", "\n", " # ============================================================\n", " # PRECOMPUTE PORTFOLIOS BY DATE\n", " # ============================================================\n", "\n", " portfolio_by_date = {\n", " d: g.droplevel(0)\n", " for d, g in portfolio.groupby(level=0)\n", " }\n", "\n", " flows_by_date = {\n", " d: g.droplevel(0)\n", " for d, g in flows_matrix.groupby(level=0)\n", " }\n", "\n", " # ============================================================\n", " # CODE SURGERY SEARCH\n", " # ============================================================\n", "\n", " code_changes = []\n", "\n", " for row in ruptures.itertuples():\n", "\n", " date = row._1\n", " acc = row._2\n", "\n", " # find previous date\n", " prev_date = score_timeline.loc[\n", " score_timeline[\"date\"] < date,\n", " \"date\"\n", " ].max()\n", "\n", " if pd.isna(prev_date):\n", " continue\n", "\n", " if date not in portfolio_by_date:\n", " continue\n", "\n", " if prev_date not in portfolio_by_date:\n", " continue\n", "\n", " # portfolio at t\n", " port_today = portfolio_by_date[date]\n", "\n", " if acc not in port_today.index:\n", " continue\n", "\n", " port_t = port_today.loc[acc]\n", "\n", " # portfolio at t-1\n", " prev_port = portfolio_by_date[prev_date]\n", "\n", " # flows at t-1\n", " prev_flow = flows_by_date.get(prev_date)\n", "\n", " # align flows to portfolio\n", " if prev_flow is not None:\n", "\n", " prev_flow = (\n", " prev_flow\n", " .reindex(index=prev_port.index, columns=prev_port.columns)\n", " .fillna(0)\n", " )\n", "\n", " else:\n", "\n", " prev_flow = pd.DataFrame(\n", " 0,\n", " index=prev_port.index,\n", " columns=prev_port.columns\n", " )\n", "\n", " # convert to numpy\n", " prev_port_mat = prev_port.values\n", " prev_flow_mat = prev_flow.values\n", "\n", " # predicted portfolio\n", " predicted = prev_port_mat + prev_flow_mat\n", "\n", " port_t_vec = port_t.reindex(prev_port.columns).fillna(0).values\n", "\n", " # compute error vectorised\n", " diff = np.abs(predicted - port_t_vec)\n", "\n", " errors = diff.sum(axis=1) / (np.abs(port_t_vec).sum() + 1)\n", "\n", " if len(errors) == 0:\n", " continue\n", "\n", " best_idx = errors.argmin()\n", "\n", " if best_idx >= len(prev_port.index):\n", " continue\n", "\n", " best_code = prev_port.index[best_idx]\n", " best_error = errors[best_idx]\n", "\n", " if best_code != acc and best_error < 0.3:\n", "\n", " code_changes.append({\n", " \"date\": date,\n", " \"old_code\": acc,\n", " \"new_code\": best_code,\n", " \"portfolio_error\": best_error\n", " })\n", "\n", " # ============================================================\n", " # RESULTS\n", " # ============================================================\n", "\n", " code_changes = pd.DataFrame(code_changes)\n", "\n", " print(\"\\nDetected distributor code changes:\")\n", " print(code_changes.head())\n", "\n", " # nouveau nom de fichier\n", " code_changes.to_csv(\"detected_code_changes_filtered.csv\", index=False)\n", "\n", " return code_changes\n", "\n", "code_changes = detect_code_changes(\n", " portfolio,\n", " flows_matrix,\n", " ruptures,\n", " score_timeline\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "068da1e3-9de7-49d1-bda4-663b02f6d76a", "metadata": {}, "outputs": [], "source": [ "def detect_code_changes_fast(portfolio, flows_matrix, ruptures, score_timeline):\n", "\n", " # ============================================================\n", " # PRECOMPUTE PORTFOLIOS BY DATE\n", " # ============================================================\n", "\n", " portfolio_by_date = {\n", " d: g.droplevel(0)\n", " for d, g in portfolio.groupby(level=0)\n", " }\n", "\n", " flows_by_date = {\n", " d: g.droplevel(0)\n", " for d, g in flows_matrix.groupby(level=0)\n", " }\n", "\n", " # ============================================================\n", " # PRECOMPUTE PREVIOUS DATES\n", " # ============================================================\n", "\n", " dates = sorted(score_timeline[\"date\"].unique())\n", "\n", " prev_date_map = {\n", " dates[i]: dates[i - 1] if i > 0 else None\n", " for i in range(len(dates))\n", " }\n", "\n", " # ============================================================\n", " # CODE SURGERY SEARCH\n", " # ============================================================\n", "\n", " code_changes = []\n", "\n", " from tqdm import tqdm\n", " \n", " for row in tqdm(ruptures.itertuples(), total=len(ruptures)):\n", " date = row._1\n", " acc = row._2\n", "\n", " prev_date = prev_date_map.get(date)\n", "\n", " if prev_date is None:\n", " continue\n", "\n", " if date not in portfolio_by_date:\n", " continue\n", "\n", " if prev_date not in portfolio_by_date:\n", " continue\n", "\n", " port_today = portfolio_by_date[date]\n", "\n", " if acc not in port_today.index:\n", " continue\n", "\n", " port_t = port_today.loc[acc]\n", "\n", " prev_port = portfolio_by_date[prev_date]\n", "\n", " # ========================================================\n", " # LIMIT CANDIDATES (shared ISINs)\n", " # ========================================================\n", "\n", " held_isins = port_t[port_t > 0].index\n", "\n", " if len(held_isins) == 0:\n", " continue\n", "\n", " candidate_mask = prev_port[held_isins].sum(axis=1) > 0\n", "\n", " candidates = prev_port.index[candidate_mask]\n", "\n", " if len(candidates) == 0:\n", " continue\n", "\n", " prev_port = prev_port.loc[candidates]\n", "\n", " prev_flow = flows_by_date.get(prev_date)\n", "\n", " if prev_flow is not None:\n", "\n", " prev_flow = prev_flow.reindex(\n", " index=candidates,\n", " columns=prev_port.columns\n", " ).fillna(0)\n", "\n", " else:\n", "\n", " prev_flow = pd.DataFrame(\n", " 0,\n", " index=candidates,\n", " columns=prev_port.columns\n", " )\n", "\n", " # ========================================================\n", " # VECTORISED ERROR COMPUTATION\n", " # ========================================================\n", "\n", " prev_port_mat = prev_port.values\n", " prev_flow_mat = prev_flow.values\n", "\n", " predicted = prev_port_mat + prev_flow_mat\n", "\n", " port_t_vec = port_t.reindex(prev_port.columns).fillna(0).values\n", "\n", " diff = np.abs(predicted - port_t_vec)\n", "\n", " errors = diff.sum(axis=1) / (np.abs(port_t_vec).sum() + 1)\n", "\n", " if len(errors) == 0:\n", " continue\n", "\n", " best_idx = errors.argmin()\n", "\n", " best_code = prev_port.index[best_idx]\n", " best_error = errors[best_idx]\n", "\n", " if best_code != acc and best_error < 0.3:\n", "\n", " code_changes.append({\n", " \"date\": date,\n", " \"old_code\": acc,\n", " \"new_code\": best_code,\n", " \"portfolio_error\": best_error\n", " })\n", "\n", " # ============================================================\n", " # RESULTS\n", " # ============================================================\n", "\n", " code_changes = pd.DataFrame(code_changes)\n", "\n", " print(\"\\nDetected distributor code changes:\")\n", " print(code_changes.head())\n", "\n", " code_changes.to_csv(\"detected_code_changes_fast.csv\", index=False)\n", "\n", " return code_changes" ] }, { "cell_type": "code", "execution_count": 3, "id": "2b332049-db18-470a-9249-248f01e8ca36", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 11%|█ | 2068/18582 [00:24<03:14, 85.09it/s] \n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 145\u001b[39m\n\u001b[32m 138\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m code_changes\n\u001b[32m 141\u001b[39m \u001b[38;5;66;03m# ============================================================\u001b[39;00m\n\u001b[32m 142\u001b[39m \u001b[38;5;66;03m# RUN\u001b[39;00m\n\u001b[32m 143\u001b[39m \u001b[38;5;66;03m# ============================================================\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m145\u001b[39m code_changes = \u001b[43mdetect_code_changes_fast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 146\u001b[39m \u001b[43m \u001b[49m\u001b[43mportfolio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 147\u001b[39m \u001b[43m \u001b[49m\u001b[43mflows_matrix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 148\u001b[39m \u001b[43m \u001b[49m\u001b[43mruptures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 149\u001b[39m \u001b[43m \u001b[49m\u001b[43mscore_timeline\u001b[49m\n\u001b[32m 150\u001b[39m \u001b[43m)\u001b[49m\n", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 69\u001b[39m, in \u001b[36mdetect_code_changes_fast\u001b[39m\u001b[34m(portfolio, flows_matrix, ruptures, score_timeline)\u001b[39m\n\u001b[32m 66\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(held_isins) == \u001b[32m0\u001b[39m:\n\u001b[32m 67\u001b[39m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m69\u001b[39m candidate_mask = \u001b[43mprev_port\u001b[49m\u001b[43m[\u001b[49m\u001b[43mheld_isins\u001b[49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43msum\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m > \u001b[32m0\u001b[39m\n\u001b[32m 71\u001b[39m candidates = prev_port.index[candidate_mask]\n\u001b[32m 73\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(candidates) == \u001b[32m0\u001b[39m:\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/frame.py:11697\u001b[39m, in \u001b[36mDataFrame.sum\u001b[39m\u001b[34m(self, axis, skipna, numeric_only, min_count, **kwargs)\u001b[39m\n\u001b[32m 11688\u001b[39m \u001b[38;5;129m@doc\u001b[39m(make_doc(\u001b[33m\"\u001b[39m\u001b[33msum\u001b[39m\u001b[33m\"\u001b[39m, ndim=\u001b[32m2\u001b[39m))\n\u001b[32m 11689\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34msum\u001b[39m(\n\u001b[32m 11690\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 11695\u001b[39m **kwargs,\n\u001b[32m 11696\u001b[39m ):\n\u001b[32m> \u001b[39m\u001b[32m11697\u001b[39m result = \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43msum\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskipna\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnumeric_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_count\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 11698\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result.__finalize__(\u001b[38;5;28mself\u001b[39m, method=\u001b[33m\"\u001b[39m\u001b[33msum\u001b[39m\u001b[33m\"\u001b[39m)\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/generic.py:12571\u001b[39m, in \u001b[36mNDFrame.sum\u001b[39m\u001b[34m(self, axis, skipna, numeric_only, min_count, **kwargs)\u001b[39m\n\u001b[32m 12563\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34msum\u001b[39m(\n\u001b[32m 12564\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 12565\u001b[39m axis: Axis | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[32m0\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 12569\u001b[39m **kwargs,\n\u001b[32m 12570\u001b[39m ):\n\u001b[32m> \u001b[39m\u001b[32m12571\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_min_count_stat_function\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 12572\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43msum\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnanops\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnansum\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskipna\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnumeric_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_count\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 12573\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/generic.py:12554\u001b[39m, in \u001b[36mNDFrame._min_count_stat_function\u001b[39m\u001b[34m(self, name, func, axis, skipna, numeric_only, min_count, **kwargs)\u001b[39m\n\u001b[32m 12551\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m axis \u001b[38;5;129;01mis\u001b[39;00m lib.no_default:\n\u001b[32m 12552\u001b[39m axis = \u001b[32m0\u001b[39m\n\u001b[32m> \u001b[39m\u001b[32m12554\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_reduce\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 12555\u001b[39m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12556\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12557\u001b[39m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m=\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12558\u001b[39m \u001b[43m \u001b[49m\u001b[43mskipna\u001b[49m\u001b[43m=\u001b[49m\u001b[43mskipna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12559\u001b[39m \u001b[43m \u001b[49m\u001b[43mnumeric_only\u001b[49m\u001b[43m=\u001b[49m\u001b[43mnumeric_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12560\u001b[39m \u001b[43m \u001b[49m\u001b[43mmin_count\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmin_count\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12561\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/frame.py:11593\u001b[39m, in \u001b[36mDataFrame._reduce\u001b[39m\u001b[34m(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)\u001b[39m\n\u001b[32m 11591\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m out_dtype \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m out.dtype != \u001b[33m\"\u001b[39m\u001b[33mboolean\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 11592\u001b[39m out = out.astype(out_dtype)\n\u001b[32m> \u001b[39m\u001b[32m11593\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m (\u001b[43mdf\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_mgr\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_dtypes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m == \u001b[38;5;28mobject\u001b[39m).any() \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [\u001b[33m\"\u001b[39m\u001b[33many\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mall\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 11594\u001b[39m out = out.astype(\u001b[38;5;28mobject\u001b[39m)\n\u001b[32m 11595\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m) == \u001b[32m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m out.dtype == \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m (\u001b[33m\"\u001b[39m\u001b[33msum\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mprod\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 11596\u001b[39m \u001b[38;5;66;03m# Even if we are object dtype, follow numpy and return\u001b[39;00m\n\u001b[32m 11597\u001b[39m \u001b[38;5;66;03m# float64, see test_apply_funcs_over_empty\u001b[39;00m\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/internals/managers.py:289\u001b[39m, in \u001b[36mBaseBlockManager.get_dtypes\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 287\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mget_dtypes\u001b[39m(\u001b[38;5;28mself\u001b[39m) -> npt.NDArray[np.object_]:\n\u001b[32m 288\u001b[39m dtypes = np.array([blk.dtype \u001b[38;5;28;01mfor\u001b[39;00m blk \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.blocks], dtype=\u001b[38;5;28mobject\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m289\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m dtypes.take(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mblknos\u001b[49m)\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/internals/managers.py:192\u001b[39m, in \u001b[36mBaseBlockManager.blknos\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 182\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 183\u001b[39m \u001b[33;03mSuppose we want to find the array corresponding to our i'th column.\u001b[39;00m\n\u001b[32m 184\u001b[39m \n\u001b[32m (...)\u001b[39m\u001b[32m 188\u001b[39m \u001b[33;03mself.blocks[self.blknos[i]]\u001b[39;00m\n\u001b[32m 189\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 190\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._blknos \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 191\u001b[39m \u001b[38;5;66;03m# Note: these can be altered by other BlockManager methods.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m192\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_rebuild_blknos_and_blklocs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 194\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._blknos\n", "\u001b[31mKeyboardInterrupt\u001b[39m: " ] } ], "source": [ "# ============================================================\n", "# RUN\n", "# ============================================================\n", "\n", "code_changes = detect_code_changes_fast(\n", " portfolio,\n", " flows_matrix,\n", " ruptures,\n", " score_timeline\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "id": "3054206e-13b8-4931-a1d8-e3cbb97eab7a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "====================================\n", "RUNNING DATA CHALLENGE ON: raw\n", "====================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 18582/18582 [02:27<00:00, 126.15it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Detected distributor code changes:\n", " date old_code new_code portfolio_error\n", "0 2015-02-28 200001285 200001992 0.090028\n", "1 2015-02-28 200001771 420304 0.049979\n", "2 2015-02-28 200001894 366541 0.215146\n", "3 2015-02-28 200002064 412736 0.210827\n", "4 2015-02-28 200002109 406337 0.173276\n", "\n", "====================================\n", "RUNNING DATA CHALLENGE ON: clean\n", "====================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 18521/18521 [02:34<00:00, 119.82it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Detected distributor code changes:\n", " date old_code new_code portfolio_error\n", "0 2015-02-28 200001285 200001992 0.090028\n", "1 2015-02-28 200001771 420304 0.049979\n", "2 2015-02-28 200001894 366541 0.215146\n", "3 2015-02-28 200002064 412736 0.210827\n", "4 2015-02-28 200002109 406337 0.173276\n", "\n", "==============================\n", "RAW VS CLEAN SCORE COMPARISON\n", "==============================\n", " date raw_scores clean_scores improvement relative_improvement\n", "0 2015-01-31 5639.521248 5639.524696 0.003448 6.113170e-07\n", "1 2015-02-28 5639.521248 5639.524696 0.003448 6.113170e-07\n", "2 2015-03-31 5656.707028 5656.722266 0.015238 2.693820e-06\n", "3 2015-04-30 5701.063923 5701.084181 0.020258 3.553334e-06\n", "4 2015-05-31 5728.963044 5728.994764 0.031720 5.536759e-06\n", " date raw_scores clean_scores improvement relative_improvement\n", "125 2025-06-30 11703.280129 11703.402856 0.122727 0.000010\n", "126 2025-07-31 11855.613232 11855.796565 0.183333 0.000015\n", "127 2025-08-31 11962.226141 11962.553334 0.327193 0.000027\n", "128 2025-09-30 12046.634770 12046.966956 0.332185 0.000028\n", "129 2025-10-31 12189.843400 12190.351082 0.507683 0.000042\n" ] } ], "source": [ "# ============================================================\n", "# RUN DATA CHALLENGE ON RAW AND CLEAN DATASETS\n", "# ============================================================\n", "\n", "DATASETS = {\n", " \"raw\": \"stocks.csv\",\n", " \"clean\": \"stock_repaired.csv\"\n", "}\n", "\n", "results = {}\n", "code_changes_results = {}\n", "\n", "for name, file in DATASETS.items():\n", "\n", " print(\"\\n====================================\")\n", " print(\"RUNNING DATA CHALLENGE ON:\", name)\n", " print(\"====================================\")\n", "\n", " # ============================================================\n", " # LOAD DATA\n", " # ============================================================\n", "\n", " stocks = pd.read_csv(file, low_memory=False)\n", " flows = pd.read_csv(\"flows.csv\", low_memory=False)\n", "\n", " stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", " flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n", "\n", " stocks[\"Registrar Account - ID\"] = stocks[\"Registrar Account - ID\"].astype(str).str.strip()\n", " flows[\"Registrar Account - ID\"] = flows[\"Registrar Account - ID\"].astype(str).str.strip()\n", "\n", " stocks[\"Product - Isin\"] = stocks[\"Product - Isin\"].astype(str)\n", " flows[\"Product - Isin\"] = flows[\"Product - Isin\"].astype(str)\n", "\n", " # ============================================================\n", " # BUILD DATASET\n", " # ============================================================\n", "\n", " df = stocks.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", " )\n", "\n", " df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", " df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", " )\n", "\n", " # ============================================================\n", " # ACCOUNTING IDENTITY\n", " # ============================================================\n", "\n", " df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", " )\n", "\n", " df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " ).fillna(0)\n", "\n", " df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"flow_lag\"]\n", "\n", " df[\"gap\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "\n", " df[\"rel_error\"] = (\n", " df[\"gap\"].abs() /\n", " df[\"expected_aum\"].abs().clip(lower=1)\n", " )\n", "\n", " # ============================================================\n", " # ACCOUNT LEVEL ERROR\n", " # ============================================================\n", "\n", " account_error = (\n", " df.groupby([\"Centralisation Date\",\"Registrar Account - ID\"])\n", " [\"rel_error\"]\n", " .mean()\n", " .reset_index()\n", " )\n", "\n", " RUPTURE_THRESHOLD = 0.5\n", "\n", " ruptures = account_error[\n", " account_error[\"rel_error\"] > RUPTURE_THRESHOLD\n", " ]\n", "\n", " # ============================================================\n", " # BUILD PORTFOLIO MATRICES\n", " # ============================================================\n", "\n", " portfolio = (\n", " stocks\n", " .pivot_table(\n", " index=[\"Centralisation Date\",\"Registrar Account - ID\"],\n", " columns=\"Product - Isin\",\n", " values=\"Quantity - AUM\",\n", " aggfunc=\"sum\"\n", " )\n", " .fillna(0)\n", " )\n", "\n", " flows_matrix = (\n", " flows\n", " .pivot_table(\n", " index=[\"Centralisation Date\",\"Registrar Account - ID\"],\n", " columns=\"Product - Isin\",\n", " values=\"Quantity - NetFlows\",\n", " aggfunc=\"sum\"\n", " )\n", " .fillna(0)\n", " )\n", "\n", " # ============================================================\n", " # SCORE COMPUTATION\n", " # ============================================================\n", "\n", " error_matrix = (\n", " account_error\n", " .pivot(\n", " index=\"Centralisation Date\",\n", " columns=\"Registrar Account - ID\",\n", " values=\"rel_error\"\n", " )\n", " .fillna(0)\n", " )\n", "\n", " dates = error_matrix.index.values\n", " errors = error_matrix.values\n", "\n", " scores = np.ones(errors.shape[1])\n", "\n", " score_history = []\n", "\n", " for i in range(len(dates)-1, -1, -1):\n", "\n", " quality = np.exp(-5 * errors[i])\n", "\n", " scores = scores * quality\n", "\n", " score_history.append({\n", " \"date\": dates[i],\n", " \"sum_scores\": scores.sum()\n", " })\n", "\n", " score_timeline = (\n", " pd.DataFrame(score_history)\n", " .sort_values(\"date\")\n", " )\n", "\n", " initial_score = score_timeline[\"sum_scores\"].iloc[-1]\n", "\n", " score_timeline[\"score_retention\"] = (\n", " score_timeline[\"sum_scores\"] / initial_score\n", " )\n", "\n", " results[name] = score_timeline.copy()\n", "\n", " # ============================================================\n", " # CODE SURGERY\n", " # ============================================================\n", "\n", " code_changes = detect_code_changes_fast(\n", " portfolio,\n", " flows_matrix,\n", " ruptures,\n", " score_timeline\n", " )\n", "\n", " code_changes_results[name] = code_changes\n", "\n", "\n", "# ============================================================\n", "# BUILD COMPARISON TABLE\n", "# ============================================================\n", "\n", "comparison = (\n", " results[\"raw\"][[\"date\", \"sum_scores\"]]\n", " .rename(columns={\"sum_scores\": \"raw_scores\"})\n", " .merge(\n", " results[\"clean\"][[\"date\", \"sum_scores\"]]\n", " .rename(columns={\"sum_scores\": \"clean_scores\"}),\n", " on=\"date\",\n", " how=\"outer\"\n", " )\n", " .sort_values(\"date\")\n", ")\n", "\n", "comparison[\"improvement\"] = (\n", " comparison[\"clean_scores\"] - comparison[\"raw_scores\"]\n", ")\n", "\n", "comparison[\"relative_improvement\"] = (\n", " comparison[\"improvement\"] /\n", " comparison[\"raw_scores\"].replace(0, np.nan)\n", ")\n", "\n", "print(\"\\n==============================\")\n", "print(\"RAW VS CLEAN SCORE COMPARISON\")\n", "print(\"==============================\")\n", "\n", "print(comparison.head())\n", "print(comparison.tail())" ] }, { "cell_type": "code", "execution_count": 7, "id": "15c60063-b8e5-43f0-a662-67a2f5601408", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TOTAL RAW SCORE : 1102753.964620689\n", "TOTAL CLEAN SCORE : 1102778.1794964853\n", "ABSOLUTE IMPROVEMENT : 24.21487579634413\n", "RELATIVE IMPROVEMENT : 2.1958547938363796e-05\n" ] } ], "source": [ "total_raw = comparison[\"raw_scores\"].sum()\n", "total_clean = comparison[\"clean_scores\"].sum()\n", "\n", "absolute_gain = total_clean - total_raw\n", "relative_gain = absolute_gain / total_raw\n", "\n", "print(\"TOTAL RAW SCORE :\", total_raw)\n", "print(\"TOTAL CLEAN SCORE :\", total_clean)\n", "\n", "print(\"ABSOLUTE IMPROVEMENT :\", absolute_gain)\n", "print(\"RELATIVE IMPROVEMENT :\", relative_gain)" ] }, { "cell_type": "code", "execution_count": 8, "id": "28f02823-483c-4ed8-bf24-912c5ca7f7d3", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1AAAAHDCAYAAAAqdvv1AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAf1BJREFUeJzs3Xd4FXXaxvHvnHQCSQikQgihJ7RAKAm9BCJNUCwoqwgoroKKWLGgWBfWio3XXQVUUMFVVFQggvRI772EThIgpELqmfePmCMxgEETTsr9uS6va8/M78x5ZvKcbG5m5jeGaZomIiIiIiIi8qcs9i5ARERERESkolCAEhERERERKSEFKBERERERkRJSgBIRERERESkhBSgREREREZESUoASEREREREpIQUoERERERGRElKAEhERERERKSEFKBERERERkRJSgBIRkVJx1113Ub9+fXuXIX9Tjx496NGjh73LKBWHDx/GMAxmzpz5l94/c+ZMDMPg8OHDpVqXiFRsClAiYnfbt2/npptuIjg4GFdXV+rUqUOfPn1455137F2aiMhfMmfOHN566y17lyEiZUABSkTsas2aNbRr146tW7dyzz338O6773L33XdjsVh4++237V2eXIX//Oc/7N27195liJQLClAilZejvQsQkart5ZdfxtPTk/Xr1+Pl5VVkXVJS0jWt5fz581SrVu2afmZlkJmZibu7O05OTvYupVSoD0RE5Ep0BkpE7OrgwYM0b968WHgC8PX1Lbbss88+o0OHDlSrVo2aNWvSrVs3Fi9eXGTM+++/T/PmzXFxcSEwMJCxY8eSkpJSZEyPHj1o0aIFGzdupFu3blSrVo2nnnoKgOzsbJ577jkaNWqEi4sLQUFBPP7442RnZ5don9auXUv//v2pWbMm7u7utGrVqtjZtKVLl9K1a1fc3d3x8vJi8ODB7N69u8iY559/HsMw2LdvH//4xz/w9PTEx8eHZ599FtM0OXbsGIMHD8bDwwN/f39ef/31Iu9ftmwZhmHw5Zdf8tRTT+Hv74+7uzvXX389x44dKzJ25cqV3HzzzdSrV8+2zw8//DAXLlwoMu6uu+6ievXqHDx4kP79+1OjRg2GDx9uW/fHe6C++OILIiIiqFGjBh4eHrRs2bLYsTh06BA333wz3t7eVKtWjcjISH744YdL7svcuXN5+eWXqVu3Lq6urvTu3ZsDBw4UGXv+/Hn27NnDmTNnrvBTKnClPvj2228ZMGAAgYGBuLi40LBhQ1588UXy8/Nt7582bRoODg5F+uv111/HMAwmTJhgW5afn0+NGjV44oknLlvLwIEDadCgwSXXRUVF0a5dO9vr2NhYunTpgpeXF9WrV6dp06a2uv9MSb5Df1TS78SMGTPo1asXvr6+uLi4EBYWxgcffFBse/Xr12fgwIGsWrWKDh064OrqSoMGDfjkk0+KjU1JSWH8+PEEBQXh4uJCo0aNmDJlClartdi4u+66C09PT7y8vBgxYkSx7/2V7Ny5k169euHm5kbdunV56aWXin0GlKwvevTowQ8//MCRI0cwDAPDMGzfjZycHCZNmkRERASenp64u7vTtWtXfvnllxLXKiL2pTNQImJXwcHBxMXFsWPHDlq0aHHFsZMnT+b555+nU6dOvPDCCzg7O7N27VqWLl1K3759gYLQMXnyZKKjo7nvvvvYu3cvH3zwAevXr2f16tVFzpKcPXuWfv36MWzYMP7xj3/g5+eH1Wrl+uuvZ9WqVYwZM4bQ0FC2b9/Om2++yb59+5g/f/4Va4yNjWXgwIEEBATw0EMP4e/vz+7du1mwYAEPPfQQAD///DP9+vWjQYMGPP/881y4cIF33nmHzp07s2nTpmIh5NZbbyU0NJR//etf/PDDD7z00kt4e3vzf//3f/Tq1YspU6Ywe/ZsHn30Udq3b0+3bt2KvP/ll1/GMAyeeOIJkpKSeOutt4iOjmbLli24ubkBMG/ePM6fP899991HrVq1WLduHe+88w7Hjx9n3rx5RbaXl5dHTEwMXbp04bXXXrvs2ZrY2Fhuu+02evfuzZQpUwDYvXs3q1evth2LxMREOnXqxPnz53nwwQepVasWs2bN4vrrr+err77ihhtuKLLNf/3rX1gsFh599FFSU1OZOnUqw4cPZ+3atbYx69ato2fPnjz33HM8//zzV/x5waX7AAomEKhevToTJkygevXqLF26lEmTJpGWlsa///1vALp27YrVamXVqlUMHDgQKAijFouFlStX2j5j8+bNZGRkFPvZXOzWW2/lzjvvZP369bRv3962/MiRI/z666+2z9y5cycDBw6kVatWvPDCC7i4uHDgwAFWr179p/taku/QH13Nd+KDDz6gefPmXH/99Tg6OvL9999z//33Y7VaGTt2bJHtHjhwgJtuuonRo0czYsQIPv74Y+666y4iIiJo3rw5UBCGu3fvzokTJ7j33nupV68ea9asYeLEiZw6dcp2iZxpmgwePJhVq1bxz3/+k9DQUL755htGjBjxp8cEICEhgZ49e5KXl8eTTz6Ju7s7H374oe37cbGS9MXTTz9Namoqx48f58033wSgevXqAKSlpfHf//6X2267jXvuuYf09HQ++ugjYmJiWLduHeHh4SWqWUTsyBQRsaPFixebDg4OpoODgxkVFWU+/vjj5qJFi8ycnJwi4/bv329aLBbzhhtuMPPz84uss1qtpmmaZlJSkuns7Gz27du3yJh3333XBMyPP/7Ytqx79+4mYE6fPr3Itj799FPTYrGYK1euLLJ8+vTpJmCuXr36svuSl5dnhoSEmMHBwea5c+cuWaNpmmZ4eLjp6+trnj171rZs69atpsViMe+8807bsueee84EzDFjxhT5jLp165qGYZj/+te/bMvPnTtnurm5mSNGjLAt++WXX0zArFOnjpmWlmZbPnfuXBMw3377bduy8+fPF9ufV1991TQMwzxy5Iht2YgRI0zAfPLJJ4uNHzFihBkcHGx7/dBDD5keHh5mXl5esbGFxo8fbwJFjnd6eroZEhJi1q9f3/ZzLNyX0NBQMzs72zb27bffNgFz+/btxfb7ueeeu+znFrpcH5jmpY/Jvffea1arVs3MysoyTdM08/PzTQ8PD/Pxxx83TbPg51yrVi3z5ptvNh0cHMz09HTTNE3zjTfeMC0WS7G+uFhqaqrp4uJiPvLII0WWT506tcjP4c033zQB8/Tp03+6fxcryXfINAuOSffu3W2vr+Y7caljFhMTYzZo0KDIsuDgYBMwV6xYYVuWlJRUbP9ffPFF093d3dy3b1+R9z/55JOmg4ODefToUdM0TXP+/PkmYE6dOtU2Ji8vz+zatasJmDNmzLjcYTFN8/c+XLt2bZF6PD09TcCMj4+/4j7+sS9M0zQHDBhQ5PtwcV0X97BpFnx//fz8zFGjRl2xThEpH3QJn4jYVZ8+fYiLi+P6669n69atTJ06lZiYGOrUqcN3331nGzd//nysViuTJk3CYin6q8swDKDgzE5OTg7jx48vMuaee+7Bw8Oj2GVhLi4ujBw5ssiyefPmERoaSrNmzThz5oztv169egFc8TKbzZs3Ex8fz/jx44tdklhY46lTp9iyZQt33XUX3t7etvWtWrWiT58+/Pjjj8W2e/fdd9v+t4ODA+3atcM0TUaPHm1b7uXlRdOmTTl06FCx9995553UqFHD9vqmm24iICCgyGdd/C/tmZmZnDlzhk6dOmGaJps3by62zfvuu++yx+HimjIzM4mNjb3smB9//JEOHTrQpUsX27Lq1aszZswYDh8+zK5du4qMHzlyJM7OzrbXXbt2BSiy3z169MA0zRKdfYJL9wEUPSbp6emcOXOGrl272i4RBLBYLHTq1IkVK1YABWfYzp49y5NPPolpmsTFxQEFZ6VatGhxyUtVC3l4eNCvXz/mzp2LaZq25V9++SWRkZHUq1cPwLaNb7/99pKXmF1OSb5Dl3I134mLj1lqaipnzpyhe/fuHDp0iNTU1CLbDQsLs/38AHx8fIr18Lx58+jatSs1a9Ys8tnR0dHk5+fbjvuPP/6Io6Njkb50cHDggQceKNGx+fHHH4mMjKRDhw5F6im8PPViJemLK3FwcLD1sNVqJTk5mby8PNq1a8emTZtKVK+I2JcClIjYXfv27fn66685d+4c69atY+LEiaSnp3PTTTfZ/oA+ePAgFouFsLCwy27nyJEjADRt2rTIcmdnZxo0aGBbX6hOnTpF/hgH2L9/Pzt37sTHx6fIf02aNAGuPLHFwYMHAa54KeLlagQIDQ3lzJkzZGZmFlle+IdzIU9PT1xdXaldu3ax5efOnSu23caNGxd5bRgGjRo1KvJsm6NHj9pCXfXq1fHx8aF79+4Axf7wdXR0pG7dupfdx0L3338/TZo0oV+/ftStW5dRo0axcOHCImOOHDly2WNRuP5ifzwWNWvWBLjkfpfUpfoACi6Vu+GGG/D09MTDwwMfHx/+8Y9/AEWPSdeuXdm4cSMXLlxg5cqVBAQE0LZtW1q3bm27jG/VqlVFwsLl3HrrrRw7dswWvA4ePMjGjRu59dZbi4zp3Lkzd999N35+fgwbNoy5c+f+aZgqyXfoUq7mO7F69Wqio6Nt9/b5+PjY7s36Yx/98WcJBT/Pi3+W+/fvZ+HChcU+Ozo6ushnHzlyhICAANtlcoUu1VuXcuTIkWLfk8u9v6R9cSWzZs2iVatWuLq6UqtWLXx8fPjhhx9K/H4RsS/dAyUi5YazszPt27enffv2NGnShJEjRzJv3jyee+65Mvm8S93fYLVaadmyJW+88cYl3xMUFFQmtVyJg4NDiZYBRc5clFR+fj59+vQhOTmZJ554gmbNmuHu7s6JEye46667iv1h7uLiUuwMxqX4+vqyZcsWFi1axE8//cRPP/3EjBkzuPPOO5k1a9ZV1wmlu9+FLtUHKSkpdO/eHQ8PD1544QUaNmyIq6srmzZt4oknnihyTLp06UJubi5xcXGsXLnSFpS6du3KypUr2bNnD6dPny5RgBo0aBDVqlVj7ty5dOrUiblz52KxWLj55puL1LtixQp++eUXfvjhBxYuXMiXX35Jr169WLx48WWP0V9V0u/EwYMH6d27N82aNeONN94gKCgIZ2dnfvzxR958881ifVSSn6XVaqVPnz48/vjjlxxbGOKulavpi8v57LPPuOuuuxgyZAiPPfYYvr6+ODg48Oqrr9r+EUZEyjcFKBEplwpnHDt16hQADRs2xGq1smvXrsveZB0cHAzA3r17i8xmlpOTQ3x8vO1fra+kYcOGbN26ld69e1/xsqbLvRdgx44dl/2si2v8oz179lC7dm3c3d2v6nP/zP79+4u8Nk2TAwcO0KpVK6DgQcb79u1j1qxZ3HnnnbZxV7r0rqScnZ0ZNGgQgwYNwmq1cv/99/N///d/PPvsszRq1Ijg4ODLHgv4/Xhda8uWLePs2bN8/fXXRSZ+iI+PLza2Q4cOODs7s3LlSlauXMljjz0GQLdu3fjPf/7DkiVLbK//jLu7OwMHDmTevHm88cYbfPnll3Tt2pXAwMAi4ywWC71796Z379688cYbvPLKKzz99NP88ssvl+29knyHLve+knwnvv/+e7Kzs/nuu++KnF36O7PLNWzYkIyMjD/97gYHB7NkyRIyMjKKnIUq6XPJgoODi31PLvX+q+mLyx2rr776igYNGvD1118XGVNW/1AkIqVPl/CJiF398ssvlzx7UHh/TuElNEOGDMFisfDCCy8U+1fewvdHR0fj7OzMtGnTimzzo48+IjU1lQEDBvxpPbfccgsnTpzgP//5T7F1Fy5cKHZ53cXatm1LSEgIb731VrHpkwvrCQgIIDw8nFmzZhUZs2PHDhYvXkz//v3/tMar9cknn5Cenm57/dVXX3Hq1Cn69esH/H4m4OJjZprm336Q8dmzZ4u8tlgsttBWOP11//79Wbdune2SNSi4B+vDDz+kfv36V325GVzdNOaXc6ljkpOTw/vvv19srKurK+3bt+fzzz/n6NGjRc5AXbhwgWnTptGwYUMCAgJK9Nm33norJ0+e5L///S9bt24tcvkeQHJycrH3FAaiK021X5Lv0KWU9DtxqWOWmprKjBkzLrvtP3PLLbcQFxfHokWLiq1LSUkhLy8PKOijvLy8IlOm5+fn884775Toc/r378+vv/7KunXrbMtOnz7N7Nmzi4y7mr5wd3e/5CV5l9rG2rVri3wHRKR80xkoEbGrBx54gPPnz3PDDTfQrFkzcnJyWLNmDV9++SX169e33dzfqFEjnn76aV588UW6du3KjTfeiIuLC+vXrycwMJBXX30VHx8fJk6cyOTJk7nuuuu4/vrr2bt3L++//z7t27e33adwJXfccQdz587ln//8J7/88gudO3cmPz+fPXv2MHfuXBYtWlTkeTwXs1gsfPDBBwwaNIjw8HBGjhxJQEAAe/bsYefOnbY/Av/973/Tr18/oqKiGD16tG0ac09PzxJPfHA1vL296dKlCyNHjiQxMZG33nqLRo0acc899wDQrFkzGjZsyKOPPsqJEyfw8PDgf//739+6rwgKJr9ITk6mV69e1K1blyNHjvDOO+8QHh5uu8fpySef5PPPP6dfv348+OCDeHt7M2vWLOLj4/nf//5XoksF/+hqpzG/lE6dOlGzZk1GjBjBgw8+iGEYfPrpp5cNGl27duVf//oXnp6etGzZEii4hLFp06bs3buXu+66q8SfXfh8rUcffRQHBweGDh1aZP0LL7zAihUrGDBgAMHBwSQlJfH+++9Tt27dIpNx/FFJvkOXUtLvRN++fW1nHO+9914yMjL4z3/+g6+vr+1M8tV67LHH+O677xg4cKBtivPMzEy2b9/OV199xeHDh6lduzaDBg2ic+fOPPnkkxw+fJiwsDC+/vrrEt9T9Pjjj/Ppp59y3XXX8dBDD9mmMQ8ODmbbtm22cVfTFxEREXz55ZdMmDCB9u3bU716dQYNGsTAgQP5+uuvueGGGxgwYADx8fFMnz6dsLAwMjIy/tJxEpFr7FpO+Sci8kc//fSTOWrUKLNZs2Zm9erVTWdnZ7NRo0bmAw88YCYmJhYb//HHH5tt2rQxXVxczJo1a5rdu3c3Y2Nji4x59913zWbNmplOTk6mn5+fed999xWbPrp79+5m8+bNL1lTTk6OOWXKFLN58+a2z4mIiDAnT55spqam/uk+rVq1yuzTp49Zo0YN093d3WzVqpX5zjvvFBnz888/m507dzbd3NxMDw8Pc9CgQeauXbuKjCmcxvyP01WPGDHCdHd3L/a5f9ynwum8P//8c3PixImmr6+v6ebmZg4YMKDI1OSmaZq7du0yo6OjzerVq5u1a9c277nnHnPr1q3FpoC+3GcXrrt42uavvvrK7Nu3r+nr62s6Ozub9erVM++9917z1KlTRd538OBB86abbjK9vLxMV1dXs0OHDuaCBQuKjCncl3nz5hVZHh8fX6zGq53G/HJ9sHr1ajMyMtJ0c3MzAwMDbVPsA+Yvv/xSZOwPP/xgAma/fv2KLL/77rtNwPzoo4/+tJaLDR8+3ATM6OjoYuuWLFliDh482AwMDDSdnZ3NwMBA87bbbis21ffl/Nl36I/TmJtmyb8T3333ndmqVSvT1dXVrF+/vjllyhTz448/LjYVeHBwsDlgwIBitV3qs9PT082JEyeajRo1Mp2dnc3atWubnTp1Ml977bUijzs4e/aseccdd5geHh6mp6eneccdd5ibN28u0TTmpmma27ZtM7t37266urqaderUMV988UXzo48+KlZ7SfsiIyPDvP32200vLy8TsH03rFar+corr5jBwcGmi4uL2aZNG3PBggXFvj8iUn4Zpvk37rwVEZFya9myZfTs2ZN58+Zx00032bscERGRSkH3QImIiIiIiJSQApSIiIiIiEgJKUCJiIiIiIiUkO6BEhERERERKSGdgRIRERERESkhBSgREREREZESqtIP0rVarZw8eZIaNWpgGIa9yxERERERETsxTZP09HQCAwOv+CD3Kh2gTp48SVBQkL3LEBERERGRcuLYsWPUrVv3suurdICqUaMGUHCQPDw87FxN1WK1Wjl9+jQ+Pj5XTPhS+akXpJB6QQqpF6SQekEuVtb9kJaWRlBQkC0jXE6VDlCFl+15eHgoQF1jVquVrKwsPDw89AuxilMvSCH1ghRSL0gh9YJc7Fr1w5/d2qNOFBERERERKSEFKBERERERkRJSgBIRERERESmhKn0PVElYrVZycnLsXUal4+DgYO8SRERERESumgLUFeTk5BAfH4/VarV3KZWOaZo4OTnh4+Nj71JEREREREpMAeoyTNPk1KlTODg4EBQUpJlfSpFpmmRmZpKQkEBiYiKBgYH2LklEREREpEQUoC4jLy+P8+fPExgYSLVq1exdTqXj6uqK1WrlzJkz+Pn56ZI+EREREakQdFrlMvLz8wFwdna2cyWVl6urKwC5ubl2rkREREREpGQUoP7Enz1IS/46HVsRERERqWgUoEREREREREpIAUpERERERKSEFKAqmbvuugvDMDAMAycnJ0JCQnj88cfJysqyd2kiIiIiIhWeZuGrhK677jpmzJhBbm4uGzduZMSIERiGwZQpU+xdmoiIiIhIhaYzUJWQi4sL/v7+BAUFMWTIEKKjo4mNjQXg7Nmz3HbbbdSpU4dq1arRsmVLPv/8c9t7FyxYgJeXl20Wwi1btmAYBk8++aRtzN13380//vGPa7tTIiIiIlKpZJ1PZ/0XL5OfV7FmZFaAKiHTNDmfk2eX/0zT/Mt179ixgzVr1timY8/KyiIiIoIffviBHTt2MGbMGO644w7WrVsHQNeuXUlPT2fz5s0ALF++nNq1a7Ns2TLbNpcvX06PHj3+ck0iIiIiIts/Hkf7PVPZ+tZQe5dyVXQJXwldyM0nbNIiu3z2rhdiqOZc8h/VggULqF69Onl5eWRnZ2OxWHj33XcBqFOnDo8++qht7AMPPMCiRYuYO3cuHTp0wNPTk/DwcJYtW0a7du1YtmwZDz/8MJMnTyYjI4PU1FQOHDhA9+7dS30/RURERKRq2Lr4E9qfmY/VNLC0H2Xvcq6KzkBVQj179mTLli2sXbuWESNGMHLkSIYOLUj2+fn5vPjii7Rs2RJvb2+qV6/OokWLOHr0qO393bt3Z9myZZimycqVK7nxxhsJDQ1l1apVLF++nMDAQBo3bmyv3RMRERGRCizp2AFC1hTcHhIX8A/Cuw+xb0FXSWegSsjNyYFdL8TY7bOvhru7O40aNQLg448/pnXr1nz00UeMHj2af//737z99tu89dZbtGzZEnd3d8aPH09OTo7t/T169ODjjz9m69atODk50axZM3r06MGyZcs4d+6czj6JiIiIyF+Sn5fL2U9HEEomex2b0H7k6/Yu6aopQJWQYRhXdRldeWGxWHjqqaeYMGECt99+O6tXr2bw4MG2SSCsViv79u0jLCzM9p7C+6DefPNNW1jq0aMH//rXvzh37hyPPPKIXfZFRERERCq2DZ88RcecHWSYblS7bSbOLi72Lumq6RK+KuDmm2/GwcGB9957j8aNGxMbG8uaNWvYvXs39957L4mJiUXG16xZk1atWjF79mzbZBHdunVj06ZN7Nu3T2egREREROSq7Vm7kHZH/gPArrbPE9SwuX0L+osUoKoAR0dHxo0bx9SpU3nkkUdo27YtMTEx9OjRA39/f4YMGVLsPd27dyc/P98WoLy9vQkLC8Pf35+mTZte2x0QERERkQotLTkJr5/G4mCYrPOIof3199q7pL/sqgPUihUrGDRoEIGBgRiGwfz5823rcnNzeeKJJ2z31gQGBnLnnXdy8uTJIttITk5m+PDheHh44OXlxejRo8nIyCgyZtu2bXTt2hVXV1eCgoKYOnVqsVrmzZtHs2bNcHV1pWXLlvz4449XuzuVzsyZM4v8TAo9+eSTJCUlUadOHebPn096ejqJiYm8+OKLzJo1q9h73nrrLUzTpFmzZrZlW7Zs4dSpU2W8ByIiIiJSmeTn5XLoo7vw5wzHjEBCR0/HMAx7l/WXXXWAyszMpHXr1rz33nvF1p0/f55Nmzbx7LPPsmnTJr7++mv27t3L9ddfX2Tc8OHD2blzJ7GxsSxYsIAVK1YwZswY2/q0tDT69u1LcHAwGzdu5N///jfPP/88H374oW3MmjVruO222xg9ejSbN29myJAhDBkyhB07dlztLomIiIiISBnISE1m5+v9Cc9cTY7pwPnrP6SGp7e9y/pbDPNvPKXVMAy++eabS14CVmj9+vV06NCBI0eOUK9ePXbv3k1YWBjr16+nXbt2ACxcuJD+/ftz/PhxAgMD+eCDD3j66adJSEiwPQD2ySefZP78+ezZsweAW2+9lczMTBYsWGD7rMjISMLDw5k+fXqJ6k9LS8PT05PU1FQ8PDyKrMvKyiI+Pp6QkBBcXV2v5rBICZimSUZGBseOHaNBgwY6xlWY1WolKSkJX19fLBZdVVyVqRekkHpBCqkXKrZTh3eT88nNBFuPccF0Zmfkv2nX766/vL2y7ocrZYOLlfm0cqmpqRiGgZeXFwBxcXF4eXnZwhNAdHQ0FouFtWvXcsMNNxAXF0e3bt1s4QkgJiaGKVOmcO7cOWrWrElcXBwTJkwo8lkxMTGXvHytUHZ2NtnZ2bbXaWlpQMEPw2q1FhlrtVoxTdP2n5S+wuN6qeMvVUfhd009IOoFKaRekELqhYpr77pFBCy8hwDSScKb5Otn0LZNt7/1syzrfijpdss0QGVlZfHEE09w22232VJcQkICvr6+RYtwdMTb25uEhATbmJCQkCJj/Pz8bOtq1qxJQkKCbdnFYwq3cSmvvvoqkydPLrb89OnTZGVlFVmWm5uL1WolLy+PvLy8Eu6xlFRh81utVs6ePYuTk5O9SxI7sVqtpKamYpqm/nWxilMvSCH1ghRSL1RM+3/5hI57/oWzkc8+S0NyB/8ftfyCSEpK+lvbLet+SE9PL9G4MgtQubm53HLLLZimyQcffFBWH3NVJk6cWOSsVVpaGkFBQfj4+FzyEr709HQcHR1xdKx4z3+qCCwWCxaLhVq1aukSvirMarViGAY+Pj76P8cqTr0ghdQLUki9UPGs/+wZuh56DwzY6N6Npv/8lGrul78c7mqUdT+U9O/RMkkGheHpyJEjLF26tEg48ff3L5Y+8/LySE5Oxt/f3zbmj88mKnz9Z2MK11+Ki4sLLpd4WFfhH/J/XGYYhu0/KV2madqO66WOv1QthmGoDwRQL8jv1AtSSL1QcaybPZmOhwommlsdOIKo0W9icXAo1c8oy34o6TZL/ZMLw9P+/fv5+eefqVWrVpH1UVFRpKSksHHjRtuypUuXYrVa6dixo23MihUryM3NtY2JjY2ladOm1KxZ0zZmyZIlRbYdGxtLVFRUae+SiIiIiIhcwbovXqHD/jcAWBM0hs5jppV6eCovrjpAZWRksGXLFrZs2QJAfHw8W7Zs4ejRo+Tm5nLTTTexYcMGZs+eTX5+PgkJCSQkJJCTkwNAaGgo1113Hffccw/r1q1j9erVjBs3jmHDhhEYGAjA7bffjrOzM6NHj2bnzp18+eWXvP3220Uuv3vooYdYuHAhr7/+Onv27OH5559nw4YNjBs3rhQOi4iIiIiIlMT6ea/RYc8UANbUGUnUyCl2rqhsXXWA2rBhA23atKFNmzYATJgwgTZt2jBp0iROnDjBd999x/HjxwkPDycgIMD235o1a2zbmD17Ns2aNaN3797079+fLl26FHnGk6enJ4sXLyY+Pp6IiAgeeeQRJk2aVORZUZ06dWLOnDl8+OGHtG7dmq+++or58+fTokWLv3M8RERERESkhNZ//Tbtd74IwBq/4USNfgOjkl9uedX3QPXo0eOK03qXZMpvb29v5syZc8UxrVq1YuXKlVccc/PNN3PzzTf/6edJcSV5hpeIiIiIyOVs/P4DIrY+BwbE+dxM1L3vVvrwBGVwD5SUDwkJCTzwwAM0aNAAFxcXgoKCGDRoULH7xkRERERErtaBratoteFpLIbJr7WGEHnfh1UiPME1eJCuXHuHDx+mc+fOeHl58e9//5uWLVuSm5vLokWLGDt2LHv27LF3iSIiIiJSQWVnZeL07T9xMvLZVK0LHe7/uMqEJ9AZqErp/vvvxzAM1q1bx9ChQ2nSpAnNmzdnwoQJ/Prrr5d8z7Fjx7jlllvw8vLC29ubwYMHc/jwYdv69evX06dPH2rXro2npyfdu3dn06ZNRbZhGAb//e9/ueGGG6hWrRqNGzfmu+++K8tdFREREZFrbPOsxwi2HuMMXtS/6z+Vdra9y1GAKinThJxM+/xXgvvKCiUnJ7Nw4ULGjh2Lu7t7sfVeXl7FluXm5hITE0ONGjVYuXIlq1evpnr16lx33XW22RPT09MZMWIEq1at4tdff6Vx48b079+/2BObJ0+ezC233MK2bdvo378/w4cPJzk5+eqOtYiIiIiUS3vWLqTDyYK5DI51+RfevoF2ruja0yV8JZV7Hl6xU4M8dRKci4ehSzlw4ACmadKsWbMSb/7LL7/EarXy3//+1/Zw2xkzZuDl5cWyZcvo27cvvXr1KvKeDz/8EC8vL5YvX87AgQNty++66y5uu+02AF555RWmTZvGunXruO6660pcj4iIiIiUP5npKdRY+CAWw2SdV386RN9m75LsQmegKpmSzIL4R1u3buXAgQPUqFGD6tWrU716dby9vcnKyuLgwYMAJCYmcs8999C4cWM8PT3x8PAgIyODo0ePFtlWq1atbP/b3d0dDw8PkpKS/t5OiYiIiIjd7Zz5IHXMRBKoTbOR79m7HLvRGaiScqpWcCbIXp9dQo0bN8YwjKuaKCIjI4OIiAhmz55dbJ2Pjw8AI0aM4OzZs7z99tsEBwfj4uJCVFSU7RI/W6lOTkVeG4aB1WotcS0iIiIiUv5sX/YVHc5+C8CZ6Ldo4elt54rsRwGqpAyjxJfR2ZO3tzcxMTG89957PPjgg8Xug0pJSSl2H1Tbtm358ssv8fX1xcPD45LbXb16Ne+//z79+/cHCiadOHPmTJnsg4iIiIiUH6nJp/Fb9hgAv/rcTGSXQXauyL50CV8l9N5775Gfn0+HDh343//+x/79+9m9ezfTpk0jKiqq2Pjhw4dTu3ZtBg8ezMqVK4mPj2fZsmU8+OCDHD9+HCg4s/Xpp5+ye/du1q5dy/Dhw3Fzc7vWuyYiIiIi19D2ZV+R9k43fEnmqBFI67vetHdJdqcAVQk1aNCATZs20bNnTx555BFatGhBnz59WLJkCR988EGx8dWqVWPFihXUq1ePG2+8kdDQUEaPHk1WVpbtjNRHH33EuXPnaNu2LXfccQcPPvggvr6+13rXREREROQaOBm/hy1T+9Fy2WiCzJOcwYvswf/Bzb2GvUuzO8P8K7MOVBJpaWl4enqSmppa7NK1rKws4uPjCQkJwdXV1U4VVl6maZKRkcGxY8do0KCBjnEVZrVaSUpKwtfXF0sVegifFKdekELqBSmkXrj2ss6ns/XzyYQfnYmLkUuu6cAG/1tofvsreNj5vqey7ocrZYOL6R4oERERERHh4LZVuH4zmo5mAhiw3TmcGje8QVRohL1LK1cUoEREREREqjDTamX9V/8mfOdUnI08EqnF8Y7P0jZmBIbO/BWjACUiIiIiUkVlpCWz9z+j6JD+Cxiw2S2KBvd8QoS37nW/HAUoEREREZEq6NCOX3H630gizJPkmg6sb/QQkbc/i8VBZ52uRAFKRERERKSK2bRwJmFxj+Jq5JJAbc4N+D86dYi2d1kVggLUn6jCkxSWOR1bERERkWtvS+xsWsZNwMnIZ4trB4Lv/pTQ2v72LqvCUIC6DCcnJwzD4PTp0/j4+GAYhr1LqjRM0yQnJ4eEhAQsFgvOzs72LklERESkSti27CvCVj2Ik5HPBo9o2jz4JQ6OigRXQ0frMhwcHKhbty7Hjx/n8OHD9i6n0jFNE8MwCAkJ0XMdRERERK6Bnau+p8kv/8TZyGOjezfCH/hc4ekv0BG7gurVq9O4cWNyc3PtXUqlYxgGycnJOvskIiIicg3sWbuIkNjRuBq5bHGLouWDX+HopL/D/goFqD/h4OCAg4ODvcuodKxWqy6LFBEREbkG9m9aRt0fR1DNyGabawTNHvwfzi4u9i6rwtK1UyIiIiIildS2pV8S+O2tVDcusNO5FY0f+BZXN3d7l1Wh6QyUiIiIiEglY1qtrPv8JdrvewOLYbLDuTX1H/gON/ca9i6twlOAEhERERGpRHKys9jyf6PpmLwADFhbcxBt/vlfnF1c7V1apaAAJSIiIiJSSaSeTeD4/91Mh5xt5JsG65o8QuRtT2No1uNSowAlIiIiIlIJHDuwHcvsm2huJpBhunGg29tE9b7V3mVVOgpQIiIiIiIV3JHdG3H/8kZqk8IJw4/sW2YTHtbe3mVVSgpQIiIiIiIV2KEdv1Lzq5upSRqHLPXxuPcH6vjVtXdZlZYClIiIiIhIBXVgy0p85t+KJ5nsd2iEz30/4lXbz95lVWq6m0xEREREpALau2EJvvNvwZNM9jg2w3fcIoWna0BnoEREREREKpg9634m6IfhuBtZ7HJqQdC476nh6W3vsqoEBSgRERERkQokfuc6An+8E3cjix3O4TR48DuqVfe0d1lVhgKUiIiIiEgFcerIXtzn3YoHmexxCqPhQwtwc69h77KqFN0DJSIiIiJSASQnHidv5hB8SeawpR4B/5yv8GQHClAiIiIiIuVcRto5zv5nMEHmSRLwwW30t3jW0oQR9qAAJSIiIiJSjuVkXeDwezfQOO8A56hB9m1f4Vengb3LqrIUoEREREREyqn0lLPsfOcmWmRv5rzpQtL1swluGm7vsqo0TSIhIiIiIlLOmFYrm378iOANL9GGFHJMBw72/j9atu1u79KqPAUoEREREZFy5Pj+raR89RAR2ZsBOGoEktb3NVp2GmDnygQUoEREREREyoWs8xlsmTOJtsdmUdfII8t0YlPwaNre9hz13KrZuzz5jQKUiIiIiIgdFV6uV2fDFCI5DQZsdW1PrZun0alhmL3Lkz9QgBIRERERsZO9G36GhU8RkbcXgERqcbzjJNrG3Ilh0Xxv5dFV/1RWrFjBoEGDCAwMxDAM5s+fX2T9119/Td++falVqxaGYbBly5Zi28jKymLs2LHUqlWL6tWrM3ToUBITE4uMOXr0KAMGDKBatWr4+vry2GOPkZeXV2TMsmXLaNu2LS4uLjRq1IiZM2de7e6IiIiIiFxzJ+P3sPH1ITRdMJSmeXvJNF2IC/4nHo9tJaLfXQpP5dhV/2QyMzNp3bo177333mXXd+nShSlTplx2Gw8//DDff/898+bNY/ny5Zw8eZIbb7zRtj4/P58BAwaQk5PDmjVrmDVrFjNnzmTSpEm2MfHx8QwYMICePXuyZcsWxo8fz913382iRYuudpdERERERMpcTtYFNv00g23/isZvZiQR6b9gNQ3Weg3g/L3riRo5BTf3GvYuU/7EVV/C169fP/r163fZ9XfccQcAhw8fvuT61NRUPvroI+bMmUOvXr0AmDFjBqGhofz6669ERkayePFidu3axc8//4yfnx/h4eG8+OKLPPHEEzz//PM4Ozszffp0QkJCeP311wEIDQ1l1apVvPnmm8TExFztbomIiIiIlIn4nWtJXP5fmib9RFvSCxYasM01gmr9X6Jjq072LVCuyjW/B2rjxo3k5uYSHR1tW9asWTPq1atHXFwckZGRxMXF0bJlS/z8/GxjYmJiuO+++9i5cydt2rQhLi6uyDYKx4wfP/6yn52dnU12drbtdVpaGgBWqxWr1VpKeyglYbVaMU1Tx13UC2KjXpBC6gUpVFF74UJmOgfWLSRr9yLqnF1DiHmKkN/WJeHNgcBB1Ol5Ny0atgCocPtnL2XdDyXd7jUPUAkJCTg7O+Pl5VVkuZ+fHwkJCbYxF4enwvWF6640Ji0tjQsXLuDm5lbss1999VUmT55cbPnp06fJysr6y/skV89qtZKamoppmlh0jW+Vpl6QQuoFKaRekEIVqRfOZ6ZxbPWXeB1fQrPsHbQ2cm3rckwHtlaLIif0JoLb9qGBY8Gf4ElJSfYqt0Iq635IT08v0bgqNQvfxIkTmTBhgu11WloaQUFB+Pj44OHhYcfKqh6r1YphGPj4+JT7X4hSttQLUki9IIXUC1KoIvTCkd0bSPrlA1qc/pEGxm//IG/AKWpz1Lszjk2iadRxABGeNe1baCVQ1v3g6upaonHXPED5+/uTk5NDSkpKkbNQiYmJ+Pv728asW7euyPsKZ+m7eMwfZ+5LTEzEw8PjkmefAFxcXHBxcSm23GKxlNsvZWVmGIaOvQDqBfmdekEKqRekUHnshdycbLYvmY3r5o8Jy9lecHmeAUeNOpxocBMB7YcQ3CScgHJUc2VRlv1Q0m1e8wAVERGBk5MTS5YsYejQoQDs3buXo0ePEhUVBUBUVBQvv/wySUlJ+Pr6AhAbG4uHhwdhYWG2MT/++GORbcfGxtq2ISIiIiJSmk6fOMTBhe/R6NhXtCUFgDzTwrbqnXGKvIcWnQdRT6Gp0rvqAJWRkcGBAwdsr+Pj49myZQve3t7Uq1eP5ORkjh49ysmTJ4GCcAQFZ4z8/f3x9PRk9OjRTJgwAW9vbzw8PHjggQeIiooiMjISgL59+xIWFsYdd9zB1KlTSUhI4JlnnmHs2LG2M0j//Oc/effdd3n88ccZNWoUS5cuZe7cufzwww9/+6CIiIiIiACYVis71ywgN+5DWmasJtIomGjgDF7srzuUkJixtA1qaOcq5Vq66gC1YcMGevbsaXtdeE/RiBEjmDlzJt999x0jR460rR82bBgAzz33HM8//zwAb775JhaLhaFDh5KdnU1MTAzvv/++7T0ODg4sWLCA++67j6ioKNzd3RkxYgQvvPCCbUxISAg//PADDz/8MG+//TZ169blv//9r6YwFxEREZG/JS3lDAfX/UTuviXUObuGFuZvt40YsMupJefD76JV9D+IcinZPTNSuRimaZr2LsJe0tLS8PT0JDU1VZNIXGNWq9V2iWZ5uqZZrj31ghRSL0gh9YIUupa9cOrwHo4s+Q81T62iUe5eHIzf/0TONF3ZUfs6fHqNpUHzDmVah1xeWfdDSbNBlZqFT0RERETkYge2rCBtyRu0TltGQGFo+m1CiJO1InFu2psmHfvT0UOz6EkBBSgRERERqVJMaz7bl32Fw6/v0jxnW8FCA7a7tOVC0yHUa9efevUaU8++ZUo5pQAlIiIiIlVG6rnTJLw3gFZ5BROd5ZoObPHsTc0+E2jZUrM5y59TgBIRERGRKiHrQiYnPhhCWN5eMkw3tvvfQMjAR2gf1MjepUkFogAlIiIiIpWeNS+PXe8No23ODtJNN5Ju/paoFh3tXZZUQJraRkREREQqNdNqZf3/3UvbjBXkmI4c6fMfGio8yV+kACUiIiIildraz56j4+mvANjafgotugyyc0VSkSlAiYiIiEiltfG7D4g8NA2AuMaP0n7g3XauSCo63QMlIiIiIpXKhYw09q75lrxdC2h9LhYMiPO7jcjbn7F3aVIJKECJiIiISIWXmnyafcs/x2n/TzTLXE+4kVuwwoB1Hn3oOOY9DMOwb5FSKShAiYiIiEiFlpOdRda0jrTnbMECA04Yfhz16UH11oNpH9UPw6I7V6R0KECJiIiISIW2e9V8WnOWNNzZETQcn/ZDadSiA3UUmqQMKECJiIiISIWWu7Vghr1dPv3pNPrfdq5GKjvFchERERGpsLLOZxCauhIArw7D7FyNVAUKUCIiIiJSYe1e8RXuRhYJ+NCkbS97lyNVgAKUiIiIiFRY5o6vAYj374vFQX/aStlTl4mIiIhIhZSZnkJoehwAtSNvs3M1UlUoQImIiIhIhbRn+TzcjByOGwE0atXZ3uVIFaEAJSIiIiIVkmVXweV7xwKv03Oe5JpRp4mIiIhIhZOWcobmmesA8Ot8u52rkapEAUpEREREKpy9y77A2cjjsCWIkND29i5HqhAFKBERERGpcFz2zAfgVN0BGIZh32KkSlGAEhEREZEKJeX0KUIvbAKgTlddvifXlgKUiIiIiFQo+5bPwcnI54BDQ+o1bm3vcqSKUYASERERkQql2v7vADgd3N/OlUhVpAAlIiIiIhXGtiVfEJq1FYDgbnfYuRqpihztXYCIiIiIyJ9JPZvA/k/G0S41FgzY6tqe1vWb2rssqYJ0BkpEREREyrUtiz8h952OtEuNJd80iPO7nSYPzLd3WVJF6QyUiIiIiJRLiccPcvzzh2l/YSUAhy1BXOg/jah2vexcmVRlClAiIiIiUm6knU1izy+f4b7vG0KztxNgmOSZFtbVHUHbf7yCq1s1e5coVZwClIiIiIjYVU52FtuXfIbDjq8Iy1xHByO/YIUBOxyb4zxgCp3adLVvkSK/UYASEREREbs4m3iM/T9Mo9HRuUSQUrDQgIOWEJLqD6Re139Q280LX19fu9YpcjEFKBERERG5pg5sXUXK0mm0SllCpJEHQBLeHKwzmIAud9AwNIKGgNVqJSkpyb7FivyBApSIiIiIlLm83By2LZmD68b/EJa7o2ChAXsdm5Iefg+t+txJlIuLfYsUKQEFKBEREREpM6nJp9n9wzvUPziHtpwGINd0YKtHD6p3H0czzagnFYwClIiIiIiUqpzsLPatXcj5Lf+jxdlFRBrZAJzDgz11htKw/0O0qxNi5ypF/hoFKBERERH529KSk9i/+mvY+xNN09fSwrhQsMKAQ5b6nG0xipbXjSaqWnX7FiryNylAiYiIiMhfkp2Vyc5fvsRx2+eEnd9AhGEtWGHAGbw4WLMr7u1upXnUABpYLPYtVqSUKECJiIiISImZVisHNi/n3JqZNDsbS1syC1YYcMgSTEJAL7zbDqZJeDdqOzjYt1iRMqAAJSIiIiJXZpoc2b2BU3FfEHjiJxpbT9hWJVCbQ3UGUbfHKBo0bkUDO5Ypci0oQImIiIjIJR3ds4GTqz8n4MQigq3HCP5t+QXTmR2e3XFtfwdhnQbirzNNUoUoQImIiIgIALk5WexbF0vG9gUEJq2gnnmSer+tyzEd2VmtPbnNBtOsx6209/S2a60i9nLVd/OtWLGCQYMGERgYiGEYzJ8/v8h60zSZNGkSAQEBuLm5ER0dzf79+4uMSU5OZvjw4Xh4eODl5cXo0aPJyMgoMmbbtm107doVV1dXgoKCmDp1arFa5s2bR7NmzXB1daVly5b8+OOPV7s7IiIiIlXauaQTbJj/Lpteu56sV+rT/Od/0DHxC4LMk+SYjmx2i2Jd+KtceHgfbZ5YSIfB9+Gh8CRV2FWfgcrMzKR169aMGjWKG2+8sdj6qVOnMm3aNGbNmkVISAjPPvssMTEx7Nq1C1dXVwCGDx/OqVOniI2NJTc3l5EjRzJmzBjmzJkDQFpaGn379iU6Oprp06ezfft2Ro0ahZeXF2PGjAFgzZo13Hbbbbz66qsMHDiQOXPmMGTIEDZt2kSLFi3+zjERERERqbRMq5X4nWtJ2vgdNU8spXHOXtoZpm39WTw56BmFQ7N+NO50PW0UlkSKMEzTNP982GXebBh88803DBkyBCg4+xQYGMgjjzzCo48+CkBqaip+fn7MnDmTYcOGsXv3bsLCwli/fj3t2rUDYOHChfTv35/jx48TGBjIBx98wNNPP01CQgLOzs4APPnkk8yfP589e/YAcOutt5KZmcmCBQts9URGRhIeHs706dNLVH9aWhqenp6kpqbi4eHxVw+D/AVWq5WkpCR8fX2xaFrTKk29IIXUC1JIvVD6TKuVA1tWkPzrbOonLcGPs0XWH3BoyGn/HtRsO4jG4d1wKCf3NKkX5GJl3Q8lzQaleg9UfHw8CQkJREdH25Z5enrSsWNH4uLiGDZsGHFxcXh5ednCE0B0dDQWi4W1a9dyww03EBcXR7du3WzhCSAmJoYpU6Zw7tw5atasSVxcHBMmTCjy+TExMcUuKRQRERGpqk4c2Mbx5Z9Q5/gCGpunbMvPmy7srdaW7AZ9qB91A43qNqCRHesUqUhKNUAlJCQA4OfnV2S5n5+fbV1CQgK+vr5Fi3B0xNvbu8iYkJCQYtsoXFezZk0SEhKu+DmXkp2dTXZ2tu11WloaUJBmrVZrifdT/j6r1Yppmjruol4QG/WCFFIv/HXnM1KI3/gzmXuX4XM6job5h6hTuM50YadHFxxa3kTTToNoXc3d9r7yeqzVC3Kxsu6Hkm63Ss3C9+qrrzJ58uRiy0+fPk1WVpYdKqq6rFYrqampmKapU/JVnHpBCqkXpJB6oeRMq5WT+zZwftdiaievp1Hufloa+bb1eaaF7S5tSA0ZSL0OAwlyL7gsKT0jk/SMTHuVXWLqBblYWfdDenp6icaVaoDy9/cHIDExkYCAANvyxMREwsPDbWOSkpKKvC8vL4/k5GTb+/39/UlMTCwypvD1n40pXH8pEydOLHLZX1paGkFBQfj4+OgeqGvMarViGAY+Pj76hVjFqRekkHpBCqkX/sRvD7VNjJtD0MmfiDAv+nvIgBOGH8c922GEdKV+hwG09qtrv1r/JvWCXKys+6Fwwrs/U6oBKiQkBH9/f5YsWWILTGlpaaxdu5b77rsPgKioKFJSUti4cSMREREALF26FKvVSseOHW1jnn76aXJzc3FycgIgNjaWpk2bUrNmTduYJUuWMH78eNvnx8bGEhUVddn6XFxccHFxKbbcYrHoS2kHhmHo2AugXpDfqRekkHrh0rb+MhevlS8QYj1G4c0O500XdteIIi+kF3XbxBAY0pQ6hmHXOkuTekEuVpb9UNJtXnWAysjI4MCBA7bX8fHxbNmyBW9vb+rVq8f48eN56aWXaNy4sW0a88DAQNtMfaGhoVx33XXcc889TJ8+ndzcXMaNG8ewYcMIDAwE4Pbbb2fy5MmMHj2aJ554gh07dvD222/z5ptv2j73oYceonv37rz++usMGDCAL774gg0bNvDhhx9e7S6JiIiIlHt7N/xMs2X342Lkkm06sdO9A/lhNxLW/WYianjauzyRKuOqA9SGDRvo2bOn7XXhJXEjRoxg5syZPP7442RmZjJmzBhSUlLo0qULCxcuLHJKbPbs2YwbN47evXtjsVgYOnQo06ZNs6339PRk8eLFjB07loiICGrXrs2kSZNsz4AC6NSpE3PmzOGZZ57hqaeeonHjxsyfP1/PgBIREZFK59SRvdReMAoXI5fNblE0GDObtjVr2bsskSrpbz0HqqLTc6DsR891kELqBSmkXpBC6oWiMtKSOf1WD0KsRzjo0AD/8b/gXsPL3mVdE+oFuVh5eQ6UOlFERESknMrPy+PgB7cSYj3CaWpS7a6vqkx4EimvFKBEREREyqn1H95P6wvryDKdSB40k4CghvYuSaTKU4ASERERKYfWzXuNyKQvAdjZcSpNI3rYtyARAarYg3RFREREyqP8vFyO7FrPmd0rcTixHv/0bXT47flOa4Lvo1P/UXauUEQKKUCJiIiIXGumyfED2zmx4TuqHfmFhhe208DIpsEfhv1a+0aiRrxilxJF5NIUoERERESugYzUZA5tWEzWnsXUObOKumYidQtXGpBmVuOwayiZPm2o1qgT9Vt1I9Lbx54li8glKECJiIiIlIHMtHMc2rSEzL2/4H16HQ1z99PK+P3pMTmmA3tdWpIR1BPfNv2oH9qOVg4OdqxYREpCAUpERESkFCQdP8TxbcvIPRyHd/JmQnIP0tKw/j7AgONGAMdrdsQ5tC9NIgfQUlOSi1Q4ClAiIiIif4FptbJ3fSzpcR8TlLIRf07je/EAA04Yfhz3bIcR0pWgtn2pG9Tw98v2RKRCUoASERERuQrpqcnsWvghvnvn0Mx6xLY8z7QQ7xjCWe82OAZHEtiyB3WCG1PHjrWKSOlTgBIRERH5E9lZ5zmwcSmZG7+kxdlFdDSyAbhgOrPduw9ubW8lJLw7jWt40djOtYpI2VKAEhEREfkDa34+h3evJ2nLQqodX0njC9tobuQUrDTgiKUupxrdTuh199LBu7Z9ixWRa0oBSkRERISCe5oO7fiV06tn0ShxIQ1I+f25TAacwYt4zw5U63AXYVH9CLZY7FmuiNiJApSIiIhUaWdOHuHA0o/xOzSfhtbDNPxteabpwoFqrblQtyu+ba4jpFk7ais0iVR5ClAiIiJS5Zw5eYSDK7/A/dCPhGZtJfK35zPlmI7sqNEZS5vbCesyhNYurnauVETKGwUoERERqRISju7n8MrP8Tz8E01zdlO78KG2BuxxCiO1yVCa9R5BW28f+xYqIuWaApSIiIhUWhcy09m55DNcd35Bi+wt+BeuMGCPYzNS6l9HUKdhNGsQas8yRaQCUYASERGRSsW0Wtm76RdS18wk7Gws7YwLAFhNgz3OLUhr0J/6XW6lWVDDP9mSiEhxClAiIiJSoeXl5hC/cy1ndy3D5cSv1M/cRjPSClYacNLw40jQYIJ73U1Y/ab2LVZEKjwFKBEREalw0lLOsnfZFzjv/ZaG57fR2LhQ5AG2500Xdnr1wK3DnYRF9iPQwcFutYpI5aIAJSIiIhXChcx0di2fi2Xn14RlrKW9kVuwwoA0qhHv1pLzAR2pGdqdBq260F4z6IlIGVCAEhERkXLJmp/Poe1xnNm2iGonVtH4wnYijJyClQYcsdTlZN3++EQMIaR5R1o76s8aESl7+k0jIiIi5YJptXLi0C5ObF6IEb8cl/ObaUQ6jQoHFN7PFHAdfp2GExLWnmA92FZErjEFKBEREbGbU0f2cmzTYozDK6mXuoG6nKXuReszTVf2VwsnK6grAW36Ua9pGwIVmkTEjhSgRERE5Jo5c/IIhzcuxHpoBXVS1lPHTCTgovU5pgMHnEM57R1B7fD+NInoSbizi93qFRH5IwUoERERKTXnM1LZu/pbspOPYZ4/h5GVgmN2Ck65adTMPk496wlqXzQ+z7Rw0KkJyb4dqd6sN40ietHMzR3vpCR8fX2x6GyTiJQzClAiIiLyt+Tl5rBr1XfkbP6CsNQVtDGyLzvWahoccmzAmdodcGvaiwYR0TT19C46xmot65JFRP4yBSgRERG5aqbVyoEtK0j+9TMaJS2mFakFKww4YfiR4B5KnosXVhdPcKuJg7s3Ll5+1G/dk0a1/H6fGEJEpIJRgBIREZESO3lwJ0eXz6TOsQU0Nk/alp+jBntr98UrcjhN2/akji69E5FKSgFKRERErigtOYk9sR/jdeBrmuTuJfC35edNF3Z6dMW5zS2EdRlCpCZ7EJEqQAFKREREijGt+ez59SfO/zqDFqnL6WDkApBvGuxwiyA79CbCet1G+xpe9i1UROQaU4ASERERm8Qjezm8/BPqxn9FqJlQsNCAg5YQkhreRKNed9I6oJ59ixQRsSMFKBERkSos63wG+9cv4vyuRQScXk0963H8fluXYbqxo1Zfana5mybhXWio+5pERBSgREREqhrTamXXmu/JXTOdZpnrafnb5XlQ8Fymfc5hZITeQlifEUTqEj0RkSIUoERERKqI3Jwsti2cQc2t/0fz/PiChQYk4U18zU44NulDo44DCfOufeUNiYhUYQpQIiIilVx6yhl2fT+NBgc/JYJkoGAGvW0+A/HpcS8Nwtrjq8vzRERKRAFKRESkkko8spfDP75Oi4Rv6WhkAXAGL/YH307ooPFE1vb7ky2IiMgfKUCJiIhUMge3rCB1yRu0TluGn2GCAfGWYJJa3E3rfncT5VbN3iWKiFRYClAiIiKVQHrKafYu+Yzqe+bSLHdXwUIDtrm0xYwcR8tuNxDioMv0RET+LgUoERGRCio3J4tdy7/CuvVLmqevoZ2RB0CO6cAWz2i8ox+mVasoO1cpIlK5KECJiIhUILnZF9gbt4CsbfNplLyM1mQUrDDgkCWYhPpDaNBrJB3qhti3UBGRSkoBSkREpJzLykxl3+pvydvxLY3TVtOCC7Z1SXhzwO86fDqNoFGrjjQwDDtWKiJS+SlAiYiIlDOm1crxvRtI2LgA92PLaZS1nVZGvm39aWpyoFYP3FrdQItO/enk5GTHakVEqpYyuZs0PT2d8ePHExwcjJubG506dWL9+vW29aZpMmnSJAICAnBzcyM6Opr9+/cX2UZycjLDhw/Hw8MDLy8vRo8eTUZGRpEx27Zto2vXrri6uhIUFMTUqVPLYndERETKXEbKGbYunMGGabdz5oWGBH3Zh/YH3iYsewvORj7H8WON723s6PcV3s8eJOqBmYR3H4yjwpOIyDVVJmeg7r77bnbs2MGnn35KYGAgn332GdHR0ezatYs6deowdepUpk2bxqxZswgJCeHZZ58lJiaGXbt24erqCsDw4cM5deoUsbGx5ObmMnLkSMaMGcOcOXMASEtLo2/fvkRHRzN9+nS2b9/OqFGj8PLyYsyYMWWxWyIiIqXGtOZzePtqTm/5Ec/jy2mUs5vWhmlbf8F0Zq9bOOeDehDQbiD1G7ekrh52KyJid4ZpmuafDyu5CxcuUKNGDb799lsGDBhgWx4REUG/fv148cUXCQwM5JFHHuHRRx8FIDU1FT8/P2bOnMmwYcPYvXs3YWFhrF+/nnbt2gGwcOFC+vfvz/HjxwkMDOSDDz7g6aefJiEhAWdnZwCefPJJ5s+fz549e0pUa1paGp6enqSmpuLh4VGah0H+hNVqJSkpCV9fXyz6g6BKUy9IoarSC6Y1n22LZ1Fz3evUsx4vsi7eCOJU7c64NY+haYe+VKtW3U5V2ldV6QX5c+oFuVhZ90NJs0Gpn4HKy8sjPz/fdiapkJubG6tWrSI+Pp6EhASio6Nt6zw9PenYsSNxcXEMGzaMuLg4vLy8bOEJIDo6GovFwtq1a7nhhhuIi4ujW7dutvAEEBMTw5QpUzh37hw1a9YsVlt2djbZ2dm212lpaUDBD8NqtZbaMZA/Z7VaMU1Tx13UC2JT2XvBtFrZtXwu1ddMpXV+PAAZpht73CPIDu5J3fYDCa7fhOCL3lNZj8Wfqey9ICWnXpCLlXU/lHS7pR6gatSoQVRUFC+++CKhoaH4+fnx+eefExcXR6NGjUhISADAz8+vyPv8/Pxs6xISEvD19S1aqKMj3t7eRcaEhIQU20bhuksFqFdffZXJkycXW3769GmysrL+4h7LX2G1WklNTcU0Tf2LUhWnXpBClbUX8nKzObF1CTU3v0/L/IL7fdNNN9b73Uq96Puo6+FlG5uUlGSnKsuXytoLcvXUC3Kxsu6H9PT0Eo0rk3ugPv30U0aNGkWdOnVwcHCgbdu23HbbbWzcuLEsPq7EJk6cyIQJE2yv09LSCAoKwsfHR5fwXWNWqxXDMPDx8dEvxCpOvSCFKlMvJB3ZxfH1P+ByeCkNz2+mLgVXP5w3XdgUcCtNb5hIDx9/O1dZflWmXpC/R70gFyvrfvjjFXSXUyYBqmHDhixfvpzMzEzS0tIICAjg1ltvpUGDBvj7F/wfRmJiIgEBAbb3JCYmEh4eDoC/v3+xf4XLy8sjOTnZ9n5/f38SExOLjCl8XTjmj1xcXHBxcSm23GKx6EtpB4Zh6NgLoF6Q31XUXsjKSOHg+p/I2rWYgLNrCLQmcPH/E53Fkz0+MTS+4Vm6BNazW50VSUXtBSl96gW5WFn2Q0m3WabPgXJ3d8fd3Z1z586xaNEipk6dSkhICP7+/ixZssQWmNLS0li7di333XcfAFFRUaSkpLBx40YiIiIAWLp0KVarlY4dO9rGPP300+Tm5uL02xSusbGxNG3a9JKX74mIiJSmE3s3cmr9fKofW0aDrJ00v+g5TTmmA3udm5Ma2JVa4f1o0qoTnR0c7FesiIiUmjIJUIsWLcI0TZo2bcqBAwd47LHHaNasGSNHjsQwDMaPH89LL71E48aNbdOYBwYGMmTIEABCQ0O57rrruOeee5g+fTq5ubmMGzeOYcOGERgYCMDtt9/O5MmTGT16NE888QQ7duzg7bff5s033yyLXRIREeF8WjJ7YmfguftzGubtp07hCgOO4c8x70icmvShccf+tKzpbc9SRUSkjJRJgEpNTWXixIkcP34cb29vhg4dyssvv2w7U/T444+TmZnJmDFjSElJoUuXLixcuLDIdYezZ89m3Lhx9O7dG4vFwtChQ5k2bZptvaenJ4sXL2bs2LFERERQu3ZtJk2apGdAiYhIqTKtVg5u+pm01R8TmryEtkYOUHCWaYdbO87X60FgxEBCmrQkyDDsXK2IiJS1Un8OVEWi50DZj57rIIXUC1KovPVC2ukT7Fv8H/wPzqWu9YRt+SGjHica3EyTPqPx869zhS3IX1XeekHsR70gF6u0z4ESERGpqKx5eexZ8w2562YRlr6Gdr/d15RpurDNqzfuUaNo0b43DRz0h5yISFWlACUiIlXamWN7ObZpEWb8KuqlrCOMcwUrDNjt0JTkpsNo3ucuonRPk4iIoAAlIiJVTPLJgxzduIj8Qyupk7IBfzOJ2hetTzGrs8OnPz7d7qZZyw4Yuq9JREQuogAlIiKVWmrSMQ6v/5G8QysIOLeBQGsCF59LyjUd2O/UhGSfjlRr0p1mHWPoUs3dbvWKiEj5pgAlIiKVUl5OFptnP0v44Y9ofdEzmvJMC/sdG3PWpwOujbvTKKI3YV66PE9EREpGAUpERCqdw9tXY84fS/v8eDBgv6UBibU74tqoBw3a9SHUu5a9SxQRkQpKAUpERCqN3OwLbPlsIm2OzsLRsJJMDfa2fY7IgaNprCmQRUSkFChAiYhIpXBi3xbyvryD9vlHwYB17j2of8d7RPnXtXdpIiJSiShAiYhIhWfNzyf7y5E0yD/KWTw52OEF2vcboRn0RESk1ClAiYhIhbflx/+jbf4h0k03csespEOdYHuXJCIilZQuCBcRkQot+0IGdTa9BsC2kNH4KzyJiEgZUoASEZEKbeu8V/Ezz5JAbdrePNHe5YiISCWnACUiIhVW6pmThB36CIDD4Y/g5l7dzhWJiEhlpwAlIiIV1t65z1KdC+x3aEj7QffauxwREakCFKBERKRCOnlgO20SvwHgfPfncXBwsHNFIiJSFShAiYhIhZQ0fyJORj6bXTvSutv19i5HRESqCAUoERGpcPatW0x4xkryTQOP61+xdzkiIlKFKECJiEiFcvrIbiyLnwJgvfdAGoa1s3NFIiJSlehBuiIiUr6ZJqf2buDkr19S61gs9fMP4wNkmi6E3PSSvasTEZEqRgFKRETKrVMHt5E/exh1rScI+G1Znmlhp0trrF0m0KZOfXuWJyIiVZAClIiIlFvHFk2jg/UEWaYTO9zakd1oAI273kRrv4A/f7OIiEgZUIASEZFyyZqXR0jSzwDs6DyNdn1vt3NFIiIimkRCRETKqf0bf8aHc6SZ1WjR7QZ7lyMiIgIoQImISDmVumEuALu9uuHq6mbnakRERAooQImISLljzcuj4eklADi1GmrnakRERH6nACUiIuXO/vWLqUUKqaY7YZ0H2bscERERGwUoEREpd9I2Fly+t8eruy7fExGRckUBSkREypX8vFwanFkKgHNrXb4nIiLliwKUiIiUK/vXLaIWqaRQnbDOA+1djoiISBEKUCIiUq5kbP4KgD1ePXBxcbVzNSIiIkUpQImISLmRn5dL47O/AOAarsv3RESk/FGAEhGRcuPY9uXUJI1z1CCsky7fExGR8kcBSkREyg1zzw8A7K3ZA2dnZ/sWIyIicgkKUCIiUi7k5WTTLHUlAK7hN9m5GhERkUtTgBIRkXJh37qfqEk6ydSgeaf+9i5HRETkkhSgRESkXLjw2+x7+2r2xMlJl++JiEj55GjvAkREpOpKO3OKfUtn4rX/f7TP3Q+Am2bfExGRckwBSkRErqmcrAvs+uVzLNu/JDRzPe2MfAByTQfW1IghKrKfnSsUERG5PAUoERG5Jk4e2sWx2PdpcupbwkkrWGjAPodGJIbcSIMed9DU0RlHJyf7FioiInIFClAiIlJm8nJz2PHLlzhsmkHLrI0E/rY8EW8OBA7Cv8tdNAlrSxPAarWSlJRkz3JFRET+lAKUiIiUuvSUs+xa8A7BBz4lnDMAWE2D7W4R5LcdScuet9BZE0WIiEgFpAAlIiKlJvHoPg7/8DrNE76lo3EBgGQ82BMwhHrR99G6YZidKxQREfl7Sn0a8/z8fJ599llCQkJwc3OjYcOGvPjii5imaRtjmiaTJk0iICAANzc3oqOj2b9/f5HtJCcnM3z4cDw8PPDy8mL06NFkZGQUGbNt2za6du2Kq6srQUFBTJ06tbR3R0RESuDwrnVsen0ItT7qSMfEL6huXOCwJYhfW0ym2hN76HTvO9RVeBIRkUqg1M9ATZkyhQ8++IBZs2bRvHlzNmzYwMiRI/H09OTBBx8EYOrUqUybNo1Zs2YREhLCs88+S0xMDLt27cLV1RWA4cOHc+rUKWJjY8nNzWXkyJGMGTOGOXPmAJCWlkbfvn2Jjo5m+vTpbN++nVGjRuHl5cWYMWNKe7dEROQSju3bwunvnyc8bRn1DRMM2O7chvyosbTqNpT6DnrcoIiIVC6lHqDWrFnD4MGDGTBgAAD169fn888/Z926dUDB2ae33nqLZ555hsGDBwPwySef4Ofnx/z58xk2bBi7d+9m4cKFrF+/nnbt2gHwzjvv0L9/f1577TUCAwOZPXs2OTk5fPzxxzg7O9O8eXO2bNnCG2+8oQAlIlLGTh7axclvn6dNymKCfgtOG9274xkzkZatouxdnoiISJkp9X8a7NSpE0uWLGHfvn0AbN26lVWrVtGvX8FzPeLj40lISCA6Otr2Hk9PTzp27EhcXBwAcXFxeHl52cITQHR0NBaLhbVr19rGdOvWDWfn329CjomJYe/evZw7d660d0tERID0lDOse28UvrM60y51EQ6GyWa3Thy4cSERj31HI4UnERGp5Er9DNSTTz5JWloazZo1w8HBgfz8fF5++WWGDx8OQEJCAgB+fn5F3ufn52dbl5CQgK+vb9FCHR3x9vYuMiYkJKTYNgrX1axZs1ht2dnZZGdn216npRU8h8RqtWK1Wv/yPsvVs1qtmKap4y7qhQrCtFrZungW9da9QAdSwICtLu1w6fMMrdt2B/jbP0P1ghRSL0gh9YJcrKz7oaTbLfUANXfuXGbPns2cOXNsl9WNHz+ewMBARowYUdofd1VeffVVJk+eXGz56dOnycrKskNFVZfVaiU1NRXTNLFYdI9EVaZeKP/OnjxI7qJJtM3eAMBRI5Bj7Z+hYdveAKX27Cb1ghRSL0gh9YJcrKz7IT09vUTjSj1APfbYYzz55JMMGzYMgJYtW3LkyBFeffVVRowYgb+/PwCJiYkEBATY3peYmEh4eDgA/v7+xf4POS8vj+TkZNv7/f39SUxMLDKm8HXhmD+aOHEiEyZMsL1OS0sjKCgIHx8fPDw8/sZey9WyWq0YhoGPj49+IVZx6oXyKzsrk61fTaX1wem4GTnkmI5sCLqLNrc9T10391L/PPWCFFIvSCH1glysrPuhcDK7P1PqAer8+fPFdsjBwcF2SiwkJAR/f3+WLFliC0xpaWmsXbuW++67D4CoqChSUlLYuHEjERERACxduhSr1UrHjh1tY55++mlyc3NxcnICIDY2lqZNm17y8j0AFxcXXFxcii23WCz6UtqBYRg69gKoF8qb8xkpbJ//Fg0OzCSSc2DADudW1Bj6Dp2ahpfpZ6sXpJB6QQqpF+RiZdkPJd1mqQeoQYMG8fLLL1OvXj2aN2/O5s2beeONNxg1ahRQsNPjx4/npZdeonHjxrZpzAMDAxkyZAgAoaGhXHfdddxzzz1Mnz6d3Nxcxo0bx7BhwwgMDATg9ttvZ/LkyYwePZonnniCHTt28Pbbb/Pmm2+W9i6JiFQJaWeT2P3tv2l6dA4dKXjuXhLexLd6mPaDx2HRlOQiIiKlH6Deeecdnn32We6//36SkpIIDAzk3nvvZdKkSbYxjz/+OJmZmYwZM4aUlBS6dOnCwoULi5w2mz17NuPGjaN3795YLBaGDh3KtGnTbOs9PT1ZvHgxY8eOJSIigtq1azNp0iRNYS4icpXOnjrK/u+m0PLkV3Q0Cu4HPW4EcKLFPwkfeC8dXdzsXKGIiEj5YZimadq7CHtJS0vD09OT1NRU3QN1jVmtVpKSkvD19dUp+SpOvWA/pw7v4diCf9H69AJcjFwADlnqc7btA7SJGYHjb5dHXyvqBSmkXpBC6gW5WFn3Q0mzQamfgRIRkfLtyJ6NnP5pCuEpsQQYVjBgj2MoFyLH07rnLTTQpXoiIiKXpQAlIlJFHNi8gvSfp9AmcxXBAAZsc4nA0v0Rmkf2w9C/7oqIiPwpBSgRkcrMNNkV9yP5K16jZdYm2+JN1briHv04rdp2s2NxIiIiFY8ClIhIJWRarexY8TXOq18jLHc3AHmmhc1effC57gnahkbYuUIREZGKSQFKRKSSObBlJdk/PUPL7C0AZJtObK49kKCBE2kf0tS+xYmIiFRwClAiIpXEyfg9nPrmKSLSlgCQYzqy0e9mGt3wFJEB9excnYiISOWgACUiUsGdO32SfV9Npk3CVwQaeVhNg42efahz40tE1dcZJxERkdKkACUiUkGlnjvN7v+9TKtjc+hoZIMB213a4NrvZdqHd7Z3eSIiIpWSApSISAWTmXaO7V9PIezwLCI5Dwbsd2jE+a5P0ar7jRiGYe8SRUREKi0FKBGRCmTjdx/QYNPLRJIOQLwlmHMdH6NNn+F6jpOIiMg1oAAlIlJB7Nu0jNYbn8LRsHLUCCQpYgJt+o0ixMHB3qWJiIhUGQpQIiIVwIXMdNy+vw9Hw8rG6j1o/dA86jk527ssERGRKkfXe4iIVADbZz5EkHmSJLxpNPJDHBWeRERE7EIBSkSknNu+4ms6nP4fAAk9X8ezlp+dKxIREam6FKBERMqxtLNJ+C19BIBfa99Iq+432rkiERGRqk0BSkSkHNs/8158SeaoEUirkW/buxwREZEqTwFKRKSc2rjg/4hIX0qeaeHCwPep5u5h75JERESqPM3CJyJSzpw5eZjDc5+gXcpCANYHjSIqoqedqxIRERFQgBIRKTeyzmewZe5LtIr/mHZGNgDrvPrT7s5X7FyZiIiIFFKAEhGxM9NqZfPCGQSuf5VI8zQYsMexGVz3Kh3a9bJ3eSIiInIRBSgRETs6sms95+c/TNuc7QAkUJtjEU8S0X80FgfdpioiIlLeKECJiNhBZto5dsx5iohTX+BoWLlgOrO53l2ED3uW9posQkREpNxSgBIRuYZMq5VNCz8maN3LdCQZDNhUrQv+t7xBp/pN7V2eiIiI/AkFKBGRayT1bAJH/vMPIrLWA3Dc8OdM1xdp2+sWO1cmIiIiJaUAJSJyDRzZtR6necNpZSaSZTqxKXgUbW97jrpu7vYuTURERK6CApSISBnb+vMcGq18GHcji5OGH1k3f0an5h3sXZaIiIj8BQpQIiJlxLRaWffpM7Q/9D4Ww2SHc2vqjJlLYG1/e5cmIiIif5EClIhIGUg8tp8Tcx+jY/ovYMCvtW8gYsz/4eTsYu/SRERE5G9QgBIRKSXpKWfYs/RT3Pd8TVjONvyAXNOBjc0nEnnLY/YuT0REREqBApSIyN9gWq3sXPkNuetnEpYeR3sj17Zup1NLjF5PExnVz44VioiISGlSgBIR+QtMq5UdS7/A7dfXaZF3oGChAfGWepwKvp76PUbQPLiJfYsUERGRUqcAJSJyFUxrPltjP8Nj3Zu0zI8H4Lzpwhaf66nddSSNW0YRYrHYuUoREREpKwpQIiIldGDTchwXPEC49QgAmaYrWwJvpcmQx+nkV9fO1YmIiMi1oAAlIvInrPn5rJ8zmbYH3sXJyCfddGN73dtoNuQJOvtoSnIREZGqRAFKROQKziYe5dSMEXTM2gQGbHTvTsOR/6FTbT97lyYiIiJ2oAAlInIZO5Z/TcAv42lBKhdMZ7a2eJKOQx/G0D1OIiIiVZYClIjIJayd+QQdD08H4JAlGOOmGUSGRdi5KhEREbE3BSgRkT/Y/etPtvD0a60baD3qXdzcq9u5KhERESkPFKBERC6Sk51FtcWPAbC25vVEPjDTvgWJiIhIuaIL+UVELrLpixcIth4jGQ+a/uN1e5cjIiIi5YwClIjIb04e2kX4oQ8BONDmKbxq+dq5IhERESlvFKBERADTauXM3AdxNXLZ7hxO+0H32rskERERKYcUoEREgM2LZtIqaz05piOeN0/TVOUiIiJySaX+F0L9+vUxDKPYf2PHjgUgKyuLsWPHUqtWLapXr87QoUNJTEwsso2jR48yYMAAqlWrhq+vL4899hh5eXlFxixbtoy2bdvi4uJCo0aNmDlzZmnviohUEekpZ6m3djIAG4Puol7j1nauSERERMqrUg9Q69ev59SpU7b/YmNjAbj55psBePjhh/n++++ZN28ey5cv5+TJk9x444229+fn5zNgwABycnJYs2YNs2bNYubMmUyaNMk2Jj4+ngEDBtCzZ0+2bNnC+PHjufvuu1m0aFFp746IVAG7Zj9ObVI4ZgTSZvgL9i5HREREyrFSn8bcx8enyOt//etfNGzYkO7du5OamspHH33EnDlz6NWrFwAzZswgNDSUX3/9lcjISBYvXsyuXbv4+eef8fPzIzw8nBdffJEnnniC559/HmdnZ6ZPn05ISAivv14wQ1ZoaCirVq3izTffJCYmprR3SUQqqbzcHDZ//z7tk/4HBqT0mkKQm7u9yxIREZFyrEyfA5WTk8Nnn33GhAkTMAyDjRs3kpubS3R0tG1Ms2bNqFevHnFxcURGRhIXF0fLli3x8/OzjYmJieG+++5j586dtGnThri4uCLbKBwzfvz4K9aTnZ1Ndna27XVaWhoAVqsVq9VaCnssJWW1WjFNU8dd7NILuTnZbPvxQwK3v0d7MxEMWOfRl3adB6on7Ui/F6SQekEKqRfkYmXdDyXdbpkGqPnz55OSksJdd90FQEJCAs7Oznh5eRUZ5+fnR0JCgm3MxeGpcH3huiuNSUtL48KFC7i5uV2ynldffZXJkycXW3769GmysrKuev/kr7NaraSmpmKaJhbdrF+lXcteyM3JIn7FbBodmEl7kgBIxoNtdW4npO99JCUllenny5Xp94IUUi9IIfWCXKys+yE9Pb1E48o0QH300Uf069ePwMDAsvyYEps4cSITJkywvU5LSyMoKAgfHx88PDzsWFnVY7VaMQwDHx8f/UKs4q5FL2SmpbBzwTuEHJhFN84CcAYv9jUcScvrH6JbDc8y+Vy5Ovq9IIXUC1JIvSAXK+t+cHV1LdG4MgtQR44c4eeff+brr7+2LfP39ycnJ4eUlJQiZ6ESExPx9/e3jVm3bl2RbRXO0nfxmD/O3JeYmIiHh8dlzz4BuLi44OLiUmy5xWLRl9IODMPQsReg7HrhbOIx9n/3GmEn5hFJJgCnqcmBxncTPmQ8ndyrl+rnyd+n3wtSSL0ghdQLcrGy7IeSbrPMOnHGjBn4+voyYMAA27KIiAicnJxYsmSJbdnevXs5evQoUVFRAERFRbF9+/Yil9LExsbi4eFBWFiYbczF2ygcU7gNEanaUk6fZN07I6j+fhsiT8zEg0yOGnVY23IyNZ7YRdTwZ3BTeBIREZG/oEzOQFmtVmbMmMGIESNwdPz9Izw9PRk9ejQTJkzA29sbDw8PHnjgAaKiooiMjASgb9++hIWFcccddzB16lQSEhJ45plnGDt2rO3s0T//+U/effddHn/8cUaNGsXSpUuZO3cuP/zwQ1nsjohUIDtXfYvvzw/RgXNgwF7HpmS2G0frPsOp5+Bg7/JERESkgiuTAPXzzz9z9OhRRo0aVWzdm2++icViYejQoWRnZxMTE8P7779vW+/g4MCCBQu47777iIqKwt3dnREjRvDCC78/myUkJIQffviBhx9+mLfffpu6devy3//+V1OYi1RhuTnZbJz5KB1OfIrFMDliqUt67yk0j+qPocs+REREpJQYpmma9i7CXtLS0vD09CQ1NVWTSFxjVquVpKQkfH19dU1zFVcavXDy0C4y5oygSd4+ANZ6X0/L0e9RzV3f64pEvxekkHpBCqkX5GJl3Q8lzQZlOgufiEhZys/LY+P8aYRtn0qgcYE03DkQ+Qodr7vL3qWJiIhIJaUAJSIVjmm1su2XuXiufpkO1qNgwG6n5tS8YxZt6zW2d3kiIiJSiSlAiUiFsm/TMvIWPkPrnO0ApOLO7sb/pN0tT+Lo5Gzn6kRERKSyU4ASkQrhwNZVpMVOpW3GcgCyTSc2BQ4j7ObnifSubefqREREpKpQgBKRcis/L4+tSz7HdeP/EfbbGSerabDB6zrq3fQSUUGN7FyhiIiIVDUKUCJS7mSknWPH9+9Q78CntDULHqqdazqw1aMn3jGP06FFRztXKCIiIlWVApSIlBtnEo6y/7vXaH5yHpGcByCF6uyucxMN+4+nXZ0QO1coIiIiVZ0ClIjYXdLRPRz58lHaJP9ElJEHwFFLHU6FjqZV/zFEudewc4UiIiIiBRSgRMQurPn57Fg5n/x1/6V1ZhwWwwQD9jiGcqHDOFr3vo16Dg72LlNERESkCAUoEbmmkpNOsG/hdILiv6SVmViw0IAtblG49JhAaMe+9i1QRERE5AoUoETkmji0Yy3Ji6fSKnUZkb9dppdGNXb5DMAl4nZad+iBxWKxc5UiIiIiV6YAJSJl6sieTZxZMJmIjGU0ADBgv2NjUsLuoHnMSDq4VScpKcneZYqIiIiUiAKUiJSJE4d2cvLb52mbEkuwYQKwsUZPPHpNoHGbbrZxVqvVXiWKiIiIXDUFKBEpNSlnEjiw5hss+36iVfpK6hhWMGBztc549n+OCD2/SURERCo4BSgR+VuO7NnIqXXz8Ty2hCY5u2j329kmDNjq2p5qMZNoc9EZJxEREZGKTAFKRK5a1vkMdiyegceOT2iSt4/gwhUGHLLUJ9G/B7U73ETr8K72LFNERESk1ClAiUiJnTy0i6OL36VZwre0IwOAHNOB3W5tyQrpS73IITQIblIwWYSIiIhIJaQAJSJXlHo2kX3L5uC2bz4tsrcQ+NvyU/hwOORWmlx3H6396tq1RhEREZFrRQFKRIrJSEtmz7IvcNr9DWHnN9LeyLet2+baHmu70bTscTMBjvoVIiIiIlWL/voREQCyzqeza/lXGDv+R1jGr7QzcgtWGHDQIYSkev0J7nYnrUKa2bdQERERETtSgBKpwnKys9i1aj55W+cRmrqKtkZWwQoDjhmBHK87gMDOt9OwWVsa2rdUERERkXJBAUqkisnLzWF33E9c2Pwlzc4tI5zMghXGb/c1BfTDN+o2GrSIJMhisW+xIiIiIuWMApRIFZCbk83+jUtJ3ziXRmeW0JJU27rT1OSgbx+8OgyjadueBCg0iYiIiFyWApRIJZSfl8eh7Ws4uz2WaifjaHRhG2FGtm39OWqwr1Yvqre9lWYdY/DRZBAiIiIiJaK/mkQqifMZqexe8TXGrq9plLGBxpynceFKoyA0HfDqgkv4zYR2GkhHZxd7lisiIiJSISlAiVRgFzLT2b3yf5g7viE0PY6Ii84ypVGNg9XCya7bCZ+WfQgJa097Bwc7VisiIiJS8SlAiVQgqWcTid/yCxcOrsHzzCYaZO+h7UXTjZ8w/DjqH0Ot9kNp2KoLbXRpnoiIiEip0l9XIuWYNT+ffRuXkrJ+LgFn4wi2HiP84gEGnDR8OeLfl9odh9GoVWfqaBIIERERkTKjACVSzphWKwc2LyN53ZfUT4ylGWeLrD9qqUOCR2sI6ohf827UaxJOoEKTiIiIyDWhACVSDmSmnWP/2h/I3b2Q+smraUyybV2G6cZur644hF1PSNto6vkEUM+OtYqIiIhUZQpQInZyJuEYB5bOpPqRJTTJ2ka4kW9bl2m6ssujMw4tbySs6w20d3O3Y6UiIiIiUkgBSuQaMq357Fq9gOy1H9EyfRWRhaHJgOOGP8dqd6VaWD+aRl6n0CQiIiJSDilAiVwD506fZO+iD6l78Auam6cKFhqw17Ep50IGEtB+MPUataSu7mUSERERKdcUoETKSMqZBPYt/wLX/d8RdmEzkYYVKLinaUft66jd416atoyyc5UiIiIicjUUoERKUcKxAxxdtwDX/QsIvbCJDhddorfPoQnnQm+necxIImt42bVOEREREflrFKBE/oak4wc5tmkx1kMrCUzdQB0zEf/ClQYcdGhAUr1+1O18O00atbBnqSIiIiJSChSgRErAmpfHyfidnD6wiewTW3E7uxu/Cwfw5wy+F43LMy0ccmrE2bq9qdv5Nho2bk1Du1UtIiIiIqVNAUrkMtKSk9i/+n8Ye3+iScY66nKBun8Yk28aHHBqTLJPB6o17kGDiN408fS2S70iIiIiUvYUoEQuknBkL0fXfIX74UU0zdpOxG8TPwBcMJ055lifcx5NMH1bUKN+OHVDO9LUS4FJREREpKpQgJIqzbRaObRjDafXf43vySU0yD9c5B6meEswpwJ64d12MI1adaGJk5M9yxURERERO1OAkionMy2ZA+sXkb0nlnqnl9OQM7b7lPJNgz3OLUgL7kPdqJsIadicELtWKyIiIiLliQKUVHp5Odkc2LKclO2L8UpYTaOcPbS+6NK8TNOFPe4dyGtyHY07D6W5T4AdqxURERGR8sxSFhs9ceIE//jHP6hVqxZubm60bNmSDRs22NabpsmkSZMICAjAzc2N6Oho9u/fX2QbycnJDB8+HA8PD7y8vBg9ejQZGRlFxmzbto2uXbvi6upKUFAQU6dOLYvdkQoo9dxpNi74kE2vD+H8K/Vp9uPNRB77D81yd+FoWDlmBPCr92A2d/k/LE/EE/H4AjoOGYe3wpOIiIiIXEGpn4E6d+4cnTt3pmfPnvz000/4+Piwf/9+atasaRszdepUpk2bxqxZswgJCeHZZ58lJiaGXbt24erqCsDw4cM5deoUsbGx5ObmMnLkSMaMGcOcOXMASEtLo2/fvkRHRzN9+nS2b9/OqFGj8PLyYsyYMaW9W1IBnDiwjeNrv6HGkZ9pkr2jyAQQ56jBoeoR5NXvQd2I/gSFNCXIjrWKiIiISMVU6gFqypQpBAUFMWPGDNuykJDf7yIxTZO33nqLZ555hsGDBwPwySef4Ofnx/z58xk2bBi7d+9m4cKFrF+/nnbt2gHwzjvv0L9/f1577TUCAwOZPXs2OTk5fPzxxzg7O9O8eXO2bNnCG2+8oQBVReTn5bJvw8+kbv2ewMRl1LOeoE7hSgPiLfU45deDmm2up0nbnkQ46opVEREREfl7Sv0vyu+++46YmBhuvvlmli9fTp06dbj//vu55557AIiPjychIYHo6Gjbezw9PenYsSNxcXEMGzaMuLg4vLy8bOEJIDo6GovFwtq1a7nhhhuIi4ujW7duODs728bExMQwZcoUzp07V+SMl1Qe6SlnObBmPta9P9EwNY5Qfr+sM9d0YI9LSzLq9yGo442ENAzTBBAiIiIiUqpKPUAdOnSIDz74gAkTJvDUU0+xfv16HnzwQZydnRkxYgQJCQkA+Pn5FXmfn5+fbV1CQgK+vr5FC3V0xNvbu8iYi89sXbzNhISESwao7OxssrOzba/T0tIAsFqtWK3WYuOl7FitVkzT/NPjblqtnDq8mxPrvsX9SCxNs7bTxsi3rU+hOvs9oqDJdTTuNJjmXrWKfIaUfyXtBan81AtSSL0ghdQLcrGy7oeSbrfUA5TVaqVdu3a88sorALRp04YdO3Ywffp0RowYUdofd1VeffVVJk+eXGz56dOnycrKskNFVZfVaiU1NRXTNLFYCuYyyc/LI+HgZjKPb8fh9B68MvYTlHuYusZ56ha+0YAjRiBHvLvg1KQPQc2jCHIseDZTVk4+WUlJ9tkh+csu1QtSNakXpJB6QQqpF+RiZd0P6enpJRpX6gEqICCAsLCwIstCQ0P53//+B4C/f8FjShMTEwkI+H3Gs8TERMLDw21jkv7wh3BeXh7Jycm29/v7+5OYmFhkTOHrwjF/NHHiRCZMmGB7nZaWRlBQED4+Pnh4eFztrsrfYLVaMQwD75pe7F8fS+aW/9Ho7C9EkFp0oAE5pgP7XJqTFtSbOh1uIKhxS00AUYkU9oKPj4/+z7GKUy9IIfWCFFIvyMXKuh8KJ7P7M6UeoDp37szevXuLLNu3bx/BwcFAwYQS/v7+LFmyxBaY0tLSWLt2Lffddx8AUVFRpKSksHHjRiIiIgBYunQpVquVjh072sY8/fTT5Obm4uRUcAYiNjaWpk2bXvb+JxcXF1xcXIott1gs+lJeQ3m5OeyO+4H0DXNxSVtFC9Js69LMahxxaUKGVyiWwJZ4N4igXtPWtHBxs2PFUtYMw9D3UAD1gvxOvSCF1AtysbLsh5Jus9QD1MMPP0ynTp145ZVXuOWWW1i3bh0ffvghH374IVCw0+PHj+ell16icePGtmnMAwMDGTJkCFBwxuq6667jnnvuYfr06eTm5jJu3DiGDRtGYGAgALfffjuTJ09m9OjRPPHEE+zYsYO3336bN998s7R3SUpBXk42e379kfNb/keT5GW05PdTpClUZ69Xd1zDhxIaNZCWlwi5IiIiIiLlQakHqPbt2/PNN98wceJEXnjhBUJCQnjrrbcYPny4bczjjz9OZmYmY8aMISUlhS5durBw4cIip81mz57NuHHj6N27NxaLhaFDhzJt2jTbek9PTxYvXszYsWOJiIigdu3aTJo0SVOYlyNpKWc4uG4hebt/pPG55bS4aMa8c9Rgl0cXqre9hbBOA+jorNAkIiIiIuWfYZqmae8i7CUtLQ1PT09SU1N1D1QpyM46z4GNS0nf/TM1E9bQKHcfDsbv7ZWMB/u9e1KtzVCadIjh3LkUfH19dUq+irNarSQlJakXRL0gNuoFKaRekIuVdT+UNBvoyaLyt5w+eZj4NV/jfCiWJpkbaW78Pk08Bhw16nCyViTVw2+gWccYOjoVPLdL05GKiIiISEWkACVX5dzpUxzfFUfm/lXUPrWMRvkH8SlcacAZvIj3aI8Z0oN67fpRL6gh9exYr4iIiIhIaVKAksu6kJnOwY1LyDgYh8uZ7QRk7sOf01w8x6HVNNjn1JRzdXrgEzGYhi0iqa1T7CIiIiJSSSlAiU1uTjYHtywnZcfPeCSsoVH2bloYecXGHTMCSareDGvD3jSIGkIzv7qX2JqIiIiISOWjAFXFJSce5+Car3E8sIgmGRtoZmT9vtKAJLw5WqMNef6tqVG/HUHNIwnyqqUH2YqIiIhIlaQAVcVY8/I4sncjCRsXUPPYzzTJ2U37wpnyjILpxeOrtyG3XjcC2/SlbsOW+OqSPBERERERQAGq0ks5fYoj25dz/tBaapzeTP2sPYQYFwgpHGDAAYeGnA7sRe2IwTRs2Ym2Dg72LFlEREREpNxSgKpkzmekcmD9Ys7vWYLf6ThCrIfxuniAAedNF/a7tSKrQV+Co26kUVAjGtmpXhERERGRikQBqhJIPZvI7p8+oMbRJTTO3kWrP0z8cMRSl0SPVph12lG7WReCm7WltZOTnaoVEREREam4FKAqsKP7tnBq0Zu0OvMjkUZOwUIDTuHDsZodsTTqRYP2/Qj2DSTYvqWKiIiIiFQKClAVzIXMdA5siMX66we0vrCu4CG1Bhy0hHC6yTDqtBtI3QZhBGjiBxERERGRUqcAVQ5kZ51n37rYS6yxkpV8krzEXbilHMAnK54AaxItf5s1z2oabHWPxKnzOJpH9aehQpOIiIiISJlSgCoH0lPO0HLpnSUbbMA5PNhXuw+B1z1Mm0Yty7Y4ERERERGxUYAqBxwcnIi31L/kuvOOHmR4NAKfZlSv2wL/Rq2p5VeXjte2RBERERERQQGqXKjpE0DNSVvtXYaIiIiIiPwJ3TQjIiIiIiJSQgpQIiIiIiIiJaQAJSIiIiIiUkIKUCIiIiIiIiWkACUiIiIiIlJCClAiIiIiIiIlpAAlIiIiIiJSQgpQIiIiIiIiJaQAJSIiIiIiUkIKUCIiIiIiIiWkACUiIiIiIlJCClAiIiIiIiIlpAAlIiIiIiJSQgpQIiIiIiIiJeRo7wLsyTRNANLS0uxcSdVjtVpJT0/H1dUVi0U5vipTL0gh9YIUUi9IIfWCXKys+6EwExRmhMup0gEqPT0dgKCgIDtXIiIiIiIi5UF6ejqenp6XXW+YfxaxKjGr1crJkyepUaMGhmHYu5wqJS0tjaCgII4dO4aHh4e9yxE7Ui9IIfWCFFIvSCH1glysrPvBNE3S09MJDAy84hmuKn0GymKxULduXXuXUaV5eHjoF6IA6gX5nXpBCqkXpJB6QS5Wlv1wpTNPhXQxqYiIiIiISAkpQImIiIiIiJSQApTYhYuLC8899xwuLi72LkXsTL0ghdQLUki9IIXUC3Kx8tIPVXoSCRERERERkauhM1AiIiIiIiIlpAAlIiIiIiJSQgpQIiIiIiIiJaQAJSIiIiIiUkIKUPKXvPrqq7Rv354aNWrg6+vLkCFD2Lt3b5ExWVlZjB07llq1alG9enWGDh1KYmJikTEPPvggERERuLi4EB4efsnPMk2T1157jSZNmuDi4kKdOnV4+eWXy2rX5C+4lv2waNEiIiMjqVGjBj4+PgwdOpTDhw+X0Z7J1SqNXti6dSu33XYbQUFBuLm5ERoayttvv13ss5YtW0bbtm1xcXGhUaNGzJw5s6x3T67CteqFr7/+mj59+uDj44OHhwdRUVEsWrTomuyjlMy1/L1QaPXq1Tg6Ol72/0vEPq5lL2RnZ/P0008THByMi4sL9evX5+OPPy6V/VCAkr9k+fLljB07ll9//ZXY2Fhyc3Pp27cvmZmZtjEPP/ww33//PfPmzWP58uWcPHmSG2+8sdi2Ro0axa233nrZz3rooYf473//y2uvvcaePXv47rvv6NChQ5nsl/w116of4uPjGTx4ML169WLLli0sWrSIM2fOXHI7Yh+l0QsbN27E19eXzz77jJ07d/L0008zceJE3n33XduY+Ph4BgwYQM+ePdmyZQvjx4/n7rvv1h/O5ci16oUVK1bQp08ffvzxRzZu3EjPnj0ZNGgQmzdvvqb7K5d3rXqhUEpKCnfeeSe9e/e+JvsnJXcte+GWW25hyZIlfPTRR+zdu5fPP/+cpk2bls6OmCKlICkpyQTM5cuXm6ZpmikpKaaTk5M5b94825jdu3ebgBkXF1fs/c8995zZunXrYst37dplOjo6mnv27Cmz2qX0lVU/zJs3z3R0dDTz8/Nty7777jvTMAwzJyen9HdE/ra/2wuF7r//frNnz562148//rjZvHnzImNuvfVWMyYmppT3QEpLWfXCpYSFhZmTJ08uncKl1JV1L9x6663mM888c9n/L5Hyo6x64aeffjI9PT3Ns2fPlkndOgMlpSI1NRUAb29voOBfB3Jzc4mOjraNadasGfXq1SMuLq7E2/3+++9p0KABCxYsICQkhPr163P33XeTnJxcujsgpaqs+iEiIgKLxcKMGTPIz88nNTWVTz/9lOjoaJycnEp3J6RUlFYvpKam2rYBEBcXV2QbADExMVfVT3JtlVUv/JHVaiU9Pf2KY8S+yrIXZsyYwaFDh3juuefKoHIpbWXVC9999x3t2rVj6tSp1KlThyZNmvDoo49y4cKFUqnbsVS2IlWa1Wpl/PjxdO7cmRYtWgCQkJCAs7MzXl5eRcb6+fmRkJBQ4m0fOnSII0eOMG/ePD755BPy8/N5+OGHuemmm1i6dGlp7oaUkrLsh5CQEBYvXswtt9zCvffeS35+PlFRUfz444+luQtSSkqrF9asWcOXX37JDz/8YFuWkJCAn59fsW2kpaVx4cIF3NzcSndn5G8py174o9dee42MjAxuueWWUqtfSk9Z9sL+/ft58sknWblyJY6O+hO3vCvLXjh06BCrVq3C1dWVb775hjNnznD//fdz9uxZZsyY8bdrV3fJ3zZ27Fh27NjBqlWrSn3bVquV7OxsPvnkE5o0aQLARx99REREBHv37i29a1ml1JRlPyQkJHDPPfcwYsQIbrvtNtLT05k0aRI33XQTsbGxGIZR6p8pf11p9MKOHTsYPHgwzz33HH379i3F6uRaula9MGfOHCZPnsy3336Lr6/vX/4sKTtl1Qv5+fncfvvtTJ482fb3gpRvZfl7wWq1YhgGs2fPxtPTE4A33niDm266iffff/9v/yObLuGTv2XcuHEsWLCAX375hbp169qW+/v7k5OTQ0pKSpHxiYmJ+Pv7l3j7AQEBODo6FvllGBoaCsDRo0f/XvFS6sq6H9577z08PT2ZOnUqbdq0oVu3bnz22WcsWbKEtWvXltZuSCkojV7YtWsXvXv3ZsyYMTzzzDNF1vn7+xebxTExMREPDw+dfSpnyroXCn3xxRfcfffdzJ07t9jlnVI+lGUvpKens2HDBsaNG4ejoyOOjo688MILbN26FUdHR121Us6U9e+FgIAA6tSpYwtPUPD3o2maHD9+/O/vQJncWSWVntVqNceOHWsGBgaa+/btK7a+8CbAr776yrZsz549Vz1pwP+3c/csjURxFMYnKmKiIBYxTTAgqI2FWihptInYCYqNIGghaCwDsRU/RAorK7VNqYVRC7voCKMMvqCVBqyEiGIhZ4tdgrNEnTXjXYvnB9NcLnf4w+HCmWJ2dnZkWZaurq4qaycnJ7IsS+fn58EMg5qZykMmk9Hg4KBn7e7uTpZl6fDwsPZBULOgsnB6eqr29nZls9mq71leXlZvb69nbXp6mp9I/CCmsiBJm5ubampqUj6fD3YIBMJEFl5fX+U4judJp9Pq6emR4zh6fHz8nuHwT0zdC2trawqHwyqXy5W1fD6vuro6PT091TwHBQpfkk6n1draqv39fZVKpcrzNpSLi4vq6OhQoVBQsVhUMplUMpn0nHN5eSnbtrWwsKDu7m7Zti3btvXy8iLp94U4MDCg4eFhHR8fq1gsamhoSKOjo0bnxcdM5WF3d1ehUEirq6u6uLjQ0dGRxsbGlEgkArkQUbsgsuA4jqLRqGZmZjxn3N/fV/ZcX18rEokom83KdV3lcjnV19dre3vb6Lx4n6ksbGxsqKGhQblczrPn4eHB6Lx4n6ks/I2/8P08prJQLpcVj8c1NTWls7MzHRwcqKurS/Pz84HMQYHCl1iWVfVZX1+v7Hl+ftbS0pLa2toUiUQ0MTGhUqnkOWdkZKTqOTc3N5U9t7e3mpycVEtLi2KxmObm5r7tt5T4GpN52NraUn9/v5qbmxWNRjU+Pi7XdQ1Nis8EkYWVlZWqZyQSCc+79vb21NfXp8bGRnV2dnregf/PVBbeuzdmZ2fNDYsPmbwX3qJA/Twms+C6rlKplMLhsOLxuDKZTGAfW0N/hgEAAAAAfIKfSAAAAACATxQoAAAAAPCJAgUAAAAAPlGgAAAAAMAnChQAAAAA+ESBAgAAAACfKFAAAAAA4BMFCgAAAAB8okABAAAAgE8UKAAAAADwiQIFAAAAAD5RoAAAAADAp19Ppq9PD2zbawAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10,5))\n", "\n", "plt.plot(comparison[\"date\"], comparison[\"raw_scores\"], label=\"Raw\")\n", "plt.plot(comparison[\"date\"], comparison[\"clean_scores\"], label=\"Clean\")\n", "\n", "plt.legend()\n", "plt.title(\"Score comparison: raw vs cleaned data\")\n", "plt.grid(alpha=0.3)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 14, "id": "a002234f-deec-4283-8245-ffac74f1930b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[None]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "import s3fs\n", "os.environ[\"AWS_ACCESS_KEY_ID\"] = 'N0C5PK75FDX2WXI8OVP1'\n", "os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'nZvC2urUkG7EvhDsFDyaOslqr160aoWMs+5MP3Ft'\n", "os.environ[\"AWS_SESSION_TOKEN\"] = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJOMEM1UEs3NUZEWDJXWEk4T1ZQMSIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiKiJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTc3MzIyNzI3OCwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJzYXJhaC50aG91bXlyZUBlbnNhZS5mciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJleHAiOjE3NzQ0MzY4OTksImZhbWlseV9uYW1lIjoiVEhPVU1ZUkUiLCJnaXZlbl9uYW1lIjoiU2FyYWgiLCJncm91cHMiOlsiYmRjLWRhdGEiLCJiZGMtY2FybWlnbmFjLWczIl0sImlhdCI6MTc3MzIyNzI5OSwiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiI5Mjc0ODgyMy04OTgzLTQzYjktYTZhNy0xYjhlNDdiOTRjNTUiLCJuYW1lIjoiU2FyYWggVEhPVU1ZUkUiLCJwb2xpY3kiOiJzdHNvbmx5IiwicHJlZmVycmVkX3VzZXJuYW1lIjoic3Rob3VteXJlLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjRkODM3NWVmLTQwY2QtNDYyMi05NzIyLTI4YjhjZTQ2MWQ5YyIsInN1YiI6ImVhYWVkN2QyLWM4MjYtNGIxNC05MzczLTYwYjNhODhlMWFiNiIsInR5cCI6IkJlYXJlciJ9.hl_SekvaH9A22PMb3W0VQBSNO67LnaneIuLC-X5XBnzOO5GLV61aocDRfYC6hvVVhdzyewSTtD2kvdyJdeu6qA'\n", "os.environ[\"AWS_DEFAULT_REGION\"] = 'us-east-1'\n", "fs = s3fs.S3FileSystem(\n", " client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n", " key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n", " secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n", " token = os.environ[\"AWS_SESSION_TOKEN\"])\n", "\n", "# 3️⃣ Upload du CSV\n", "local_file = \"stock_repaired.csv\"\n", "s3_path = \"projet-bdc-carmignac-g3\"\n", "\n", "fs.put(local_file, s3_path)" ] }, { "cell_type": "code", "execution_count": 16, "id": "eeb8f32c-c717-4d48-85c5-248661b5a945", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "====================================\n", "RUNNING COHERENCE SCORE ON: raw\n", "====================================\n", "GLOBAL SCORE: 0.756699778421513\n", "\n", "====================================\n", "RUNNING COHERENCE SCORE ON: clean\n", "====================================\n" ] }, { "ename": "KeyError", "evalue": "'Centralisation Date'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/indexes/base.py:3812\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3811\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m3812\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_engine\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3813\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:167\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:196\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7096\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[31mKeyError\u001b[39m: 'Centralisation Date'", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[16]\u001b[39m\u001b[32m, line 45\u001b[39m\n\u001b[32m 39\u001b[39m \u001b[38;5;66;03m# --------------------------------------------------------\u001b[39;00m\n\u001b[32m 40\u001b[39m \u001b[38;5;66;03m# LOAD STOCKS\u001b[39;00m\n\u001b[32m 41\u001b[39m \u001b[38;5;66;03m# --------------------------------------------------------\u001b[39;00m\n\u001b[32m 43\u001b[39m stocks = pd.read_csv(stock_file, low_memory=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m---> \u001b[39m\u001b[32m45\u001b[39m stocks[\u001b[33m\"\u001b[39m\u001b[33mCentralisation Date\u001b[39m\u001b[33m\"\u001b[39m] = pd.to_datetime(\u001b[43mstocks\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mCentralisation Date\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m)\n\u001b[32m 46\u001b[39m stocks[\u001b[33m\"\u001b[39m\u001b[33mRegistrar Account - ID\u001b[39m\u001b[33m\"\u001b[39m] = stocks[\u001b[33m\"\u001b[39m\u001b[33mRegistrar Account - ID\u001b[39m\u001b[33m\"\u001b[39m].astype(\u001b[38;5;28mstr\u001b[39m).str.strip()\n\u001b[32m 47\u001b[39m stocks[\u001b[33m\"\u001b[39m\u001b[33mProduct - Isin\u001b[39m\u001b[33m\"\u001b[39m] = stocks[\u001b[33m\"\u001b[39m\u001b[33mProduct - Isin\u001b[39m\u001b[33m\"\u001b[39m].astype(\u001b[38;5;28mstr\u001b[39m)\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/frame.py:4113\u001b[39m, in \u001b[36mDataFrame.__getitem__\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 4111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.columns.nlevels > \u001b[32m1\u001b[39m:\n\u001b[32m 4112\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._getitem_multilevel(key)\n\u001b[32m-> \u001b[39m\u001b[32m4113\u001b[39m indexer = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4114\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[32m 4115\u001b[39m indexer = [indexer]\n", "\u001b[36mFile \u001b[39m\u001b[32m/opt/python/lib/python3.13/site-packages/pandas/core/indexes/base.py:3819\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3814\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[32m 3815\u001b[39m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc.Iterable)\n\u001b[32m 3816\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[32m 3817\u001b[39m ):\n\u001b[32m 3818\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[32m-> \u001b[39m\u001b[32m3819\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01merr\u001b[39;00m\n\u001b[32m 3820\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[32m 3821\u001b[39m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[32m 3822\u001b[39m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[32m 3823\u001b[39m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[32m 3824\u001b[39m \u001b[38;5;28mself\u001b[39m._check_indexing_error(key)\n", "\u001b[31mKeyError\u001b[39m: 'Centralisation Date'" ] } ], "source": [ "# ============================================================\n", "# DATASETS\n", "# ============================================================\n", "\n", "DATASETS = {\n", " \"raw\": \"stocks.csv\",\n", " \"clean\": \"stock_repaired.csv\"\n", "}\n", "\n", "flows_file = \"flows.csv\"\n", "\n", "results = {}\n", "\n", "# ============================================================\n", "# LOAD FLOWS\n", "# ============================================================\n", "\n", "flows = pd.read_csv(flows_file, low_memory=False)\n", "\n", "flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n", "flows[\"Registrar Account - ID\"] = flows[\"Registrar Account - ID\"].astype(str).str.strip()\n", "flows[\"Product - Isin\"] = flows[\"Product - Isin\"].astype(str)\n", "\n", "flows[\"Quantity - NetFlows\"] = flows[\"Quantity - NetFlows\"].fillna(0)\n", "\n", "# ============================================================\n", "# LOOP OVER DATASETS\n", "# ============================================================\n", "\n", "for name, stock_file in DATASETS.items():\n", "\n", " print(\"\\n====================================\")\n", " print(\"RUNNING COHERENCE SCORE ON:\", name)\n", " print(\"====================================\")\n", "\n", " # --------------------------------------------------------\n", " # LOAD STOCKS\n", " # --------------------------------------------------------\n", "\n", " stocks = pd.read_csv(stock_file, low_memory=False)\n", "\n", " stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", " stocks[\"Registrar Account - ID\"] = stocks[\"Registrar Account - ID\"].astype(str).str.strip()\n", " stocks[\"Product - Isin\"] = stocks[\"Product - Isin\"].astype(str)\n", "\n", " # --------------------------------------------------------\n", " # MERGE FLOWS\n", " # --------------------------------------------------------\n", "\n", " df = stocks.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", " )\n", "\n", " df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", " df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", " )\n", "\n", " # --------------------------------------------------------\n", " # ACCOUNTING RELATION\n", " # --------------------------------------------------------\n", "\n", " df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", " )\n", "\n", " df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " ).fillna(0)\n", "\n", " df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"flow_lag\"]\n", "\n", " df[\"error\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "\n", " # --------------------------------------------------------\n", " # NORMALIZED ERROR\n", " # --------------------------------------------------------\n", "\n", " df[\"scale\"] = df[\"prev_aum\"].abs().clip(lower=1)\n", "\n", " df[\"normalized_error\"] = df[\"error\"].abs() / df[\"scale\"]\n", "\n", " # --------------------------------------------------------\n", " # OBSERVATION SCORE\n", " # --------------------------------------------------------\n", "\n", " df[\"score_obs\"] = np.exp(-5 * df[\"normalized_error\"])\n", "\n", " # --------------------------------------------------------\n", " # ACCOUNT SCORE\n", " # --------------------------------------------------------\n", "\n", " account_score = (\n", " df.groupby(\"Registrar Account - ID\")[\"score_obs\"]\n", " .mean()\n", " )\n", "\n", " # --------------------------------------------------------\n", " # ACCOUNT WEIGHTS (31/10/2025)\n", " # --------------------------------------------------------\n", "\n", " last_date = stocks[\"Centralisation Date\"].max()\n", "\n", " aum_last = (\n", " stocks[stocks[\"Centralisation Date\"] == last_date]\n", " .groupby(\"Registrar Account - ID\")[\"Quantity - AUM\"]\n", " .sum()\n", " )\n", "\n", " weights = aum_last / aum_last.sum()\n", "\n", " # --------------------------------------------------------\n", " # ALIGN\n", " # --------------------------------------------------------\n", "\n", " combined = pd.concat([account_score, weights], axis=1)\n", " combined.columns = [\"score\", \"weight\"]\n", "\n", " combined = combined.fillna(0)\n", "\n", " # --------------------------------------------------------\n", " # GLOBAL SCORE\n", " # --------------------------------------------------------\n", "\n", " combined[\"weighted_score\"] = combined[\"score\"] * combined[\"weight\"]\n", "\n", " global_score = combined[\"weighted_score\"].sum()\n", "\n", " print(\"GLOBAL SCORE:\", global_score)\n", "\n", " results[name] = {\n", " \"score\": global_score,\n", " \"details\": combined\n", " }\n", "\n", "# ============================================================\n", "# COMPARISON\n", "# ============================================================\n", "\n", "print(\"\\n====================================\")\n", "print(\"RAW VS CLEAN COMPARISON\")\n", "print(\"====================================\")\n", "\n", "raw_score = results[\"raw\"][\"score\"]\n", "clean_score = results[\"clean\"][\"score\"]\n", "\n", "improvement = clean_score - raw_score\n", "relative = improvement / raw_score\n", "\n", "print(\"RAW SCORE :\", raw_score)\n", "print(\"CLEAN SCORE :\", clean_score)\n", "print(\"IMPROVEMENT :\", improvement)\n", "print(\"RELATIVE IMPROVEMENT :\", relative)" ] }, { "cell_type": "code", "execution_count": null, "id": "4ce0bfea-7714-4fd7-ba29-17ce8d976ab6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "# ============================================================\n", "# DATASETS\n", "# ============================================================\n", "\n", "DATASETS = {\n", " \"clean\": \"stock_repaired.csv\"\n", "}\n", "\n", "flows_file = \"flows.csv\"\n", "\n", "results = {}\n", "\n", "# ============================================================\n", "# LOAD FLOWS\n", "# ============================================================\n", "\n", "flows = pd.read_csv(flows_file, sep=\";\", low_memory=False)\n", "\n", "flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"])\n", "flows[\"Registrar Account - ID\"] = flows[\"Registrar Account - ID\"].astype(str).str.strip()\n", "flows[\"Product - Isin\"] = flows[\"Product - Isin\"].astype(str)\n", "\n", "flows[\"Quantity - NetFlows\"] = flows[\"Quantity - NetFlows\"].fillna(0)\n", "\n", "# ============================================================\n", "# LOOP OVER DATASETS\n", "# ============================================================\n", "\n", "for name, stock_file in DATASETS.items():\n", "\n", " print(\"\\n====================================\")\n", " print(\"RUNNING COHERENCE SCORE ON:\", name)\n", " print(\"====================================\")\n", "\n", " # --------------------------------------------------------\n", " # LOAD STOCKS\n", " # --------------------------------------------------------\n", "\n", " stocks = pd.read_csv(stock_file, sep=\";\", low_memory=False)\n", "\n", " stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"])\n", " stocks[\"Registrar Account - ID\"] = stocks[\"Registrar Account - ID\"].astype(str).str.strip()\n", " stocks[\"Product - Isin\"] = stocks[\"Product - Isin\"].astype(str)\n", "\n", " # --------------------------------------------------------\n", " # MERGE FLOWS\n", " # --------------------------------------------------------\n", "\n", " df = stocks.merge(\n", " flows,\n", " on=[\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"],\n", " how=\"left\"\n", " )\n", "\n", " df[\"Quantity - NetFlows\"] = df[\"Quantity - NetFlows\"].fillna(0)\n", "\n", " df = df.sort_values(\n", " [\"Registrar Account - ID\",\"Product - Isin\",\"Centralisation Date\"]\n", " )\n", "\n", " # --------------------------------------------------------\n", " # ACCOUNTING RELATION\n", " # --------------------------------------------------------\n", "\n", " df[\"prev_aum\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - AUM\"]\n", " .shift(1)\n", " )\n", "\n", " df[\"flow_lag\"] = (\n", " df.groupby([\"Registrar Account - ID\",\"Product - Isin\"])\n", " [\"Quantity - NetFlows\"]\n", " .shift(1)\n", " ).fillna(0)\n", "\n", " df[\"expected_aum\"] = df[\"prev_aum\"] + df[\"flow_lag\"]\n", "\n", " df[\"error\"] = df[\"Quantity - AUM\"] - df[\"expected_aum\"]\n", "\n", " # --------------------------------------------------------\n", " # NORMALIZED ERROR\n", " # --------------------------------------------------------\n", "\n", " df[\"scale\"] = df[\"prev_aum\"].abs().clip(lower=1)\n", "\n", " df[\"normalized_error\"] = df[\"error\"].abs() / df[\"scale\"]\n", "\n", " # --------------------------------------------------------\n", " # OBSERVATION SCORE\n", " # --------------------------------------------------------\n", "\n", " df[\"score_obs\"] = np.exp(-5 * df[\"normalized_error\"])\n", "\n", " # --------------------------------------------------------\n", " # ACCOUNT SCORE\n", " # --------------------------------------------------------\n", "\n", " account_score = (\n", " df.groupby(\"Registrar Account - ID\")[\"score_obs\"]\n", " .mean()\n", " )\n", "\n", " # --------------------------------------------------------\n", " # ACCOUNT WEIGHTS (31/10/2025)\n", " # --------------------------------------------------------\n", "\n", " last_date = stocks[\"Centralisation Date\"].max()\n", "\n", " aum_last = (\n", " stocks[stocks[\"Centralisation Date\"] == last_date]\n", " .groupby(\"Registrar Account - ID\")[\"Quantity - AUM\"]\n", " .sum()\n", " )\n", "\n", " weights = aum_last / aum_last.sum()\n", "\n", " combined = pd.concat([account_score, weights], axis=1)\n", " combined.columns = [\"score\", \"weight\"]\n", " combined = combined.fillna(0)\n", "\n", " combined[\"weighted_score\"] = combined[\"score\"] * combined[\"weight\"]\n", "\n", " global_score = combined[\"weighted_score\"].sum()\n", "\n", " print(\"GLOBAL SCORE:\", global_score)\n", "\n", " results[name] = global_score\n", "\n", "# ============================================================\n", "# COMPARISON\n", "# ============================================================\n", "\n", "print(\"\\n====================================\")\n", "print(\"RAW VS CLEAN COMPARISON\")\n", "print(\"====================================\")\n", "\n", "raw_score = results[\"raw\"]\n", "clean_score = results[\"clean\"]\n", "\n", "improvement = clean_score - raw_score\n", "relative = improvement / raw_score\n", "\n", "print(\"RAW SCORE :\", raw_score)\n", "print(\"CLEAN SCORE :\", clean_score)\n", "print(\"IMPROVEMENT :\", improvement)\n", "print(\"RELATIVE IMPROVEMENT :\", relative)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.11" } }, "nbformat": 4, "nbformat_minor": 5 }