Project_Carmignac/data_exploration/dataloader.ipynb

2746 lines
694 KiB
Plaintext
Raw Normal View History

2025-12-05 09:49:17 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "126c8a80-d9ad-4816-84f0-0c3d580f62c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: openpyxl in /opt/python/lib/python3.13/site-packages (3.1.5)\n",
"Requirement already satisfied: et-xmlfile in /opt/python/lib/python3.13/site-packages (from openpyxl) (2.0.0)\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"!pip install openpyxl"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ff2261fb-9516-4410-b42d-3acc8dc1a460",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
"fs = s3fs.S3FileSystem(\n",
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
]
},
{
"cell_type": "markdown",
"id": "3d36f3f0-bd40-4a83-96d1-b46d75f5a4c5",
"metadata": {},
"source": [
"# Data exploration"
]
},
{
"cell_type": "markdown",
"id": "eaf5c5a0-eb1c-4242-b893-7600e6def109",
"metadata": {},
"source": [
"Fonctions utiles"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "60e2035c-c2f0-4c51-97df-102e67ba96ee",
"metadata": {},
"outputs": [],
"source": [
"def plot_account(account_id, isin=None):\n",
" \"\"\"\n",
" Plots the stock (Quantity - AUM) evolution for a given Registrar Account.\n",
" Optionally, only for one ISIN.\n",
" \"\"\"\n",
"\n",
" df = merged.copy()\n",
"\n",
" # Filter by account\n",
" df = df[df[\"Registrar Account - ID\"] == account_id]\n",
"\n",
" if isin is not None:\n",
" df = df[df[\"Product - Isin\"] == isin]\n",
"\n",
" if df.empty:\n",
" print(f\"No data found for account {account_id}\")\n",
" return\n",
"\n",
" df_plot = df.groupby(\"Centralisation Date\")[\"Quantity - AUM\"].sum().reset_index()\n",
"\n",
" df_plot = df_plot.sort_values(\"Centralisation Date\")\n",
"\n",
" # Plot\n",
" plt.figure(figsize=(12, 4))\n",
" plt.plot(df_plot[\"Centralisation Date\"], df_plot[\"Quantity - AUM\"], marker='o')\n",
" plt.title(f\"Stock Evolution for Account {account_id}\", fontsize=14)\n",
" plt.xlabel(\"Date\")\n",
" plt.ylabel(\"Total AUM\")\n",
" plt.grid(True)\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "37e008b1-32d4-44be-9d23-1b90a5a26f89",
"metadata": {},
"outputs": [],
"source": [
"# 2. BASIC INSPECTION\n",
"\n",
"def quick_info(df, name):\n",
" print(\"\\n\" + \"=\"*80)\n",
" print(f\"DATASET : {name}\")\n",
" print(\"=\"*80)\n",
" print(\"\\nShape :\", df.shape)\n",
" print(\"\\nColumns :\", df.columns.tolist())\n",
" print(\"\\nDtypes :\\n\", df.dtypes)\n",
" print(\"\\nMissing values (%) :\\n\", df.isna().mean().sort_values(ascending=False)*100)\n",
" print(\"\\nSample rows:\\n\", df.head(5))\n",
" print(\"\\nUnique values per column:\\n\", df.nunique().sort_values(ascending=False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e67a99ea-ddf4-4627-8f48-ec183c671acb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11151/19230119.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" flows = pd.read_csv(f, sep=\";\")\n"
]
}
],
"source": [
"with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f:\n",
" flows = pd.read_csv(f, sep=\";\")\n",
"\n",
"with fs.open('projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
" stocks = pd.read_csv(f, sep=\";\")\n",
"\n",
"with fs.open('projet-bdc-data/carmignac/Monthly AUM and NAV since 2010.xlsx', 'rb') as f:\n",
" nav_raw = pd.read_excel(f, header=None, engine=\"openpyxl\")\n",
"nav = nav_raw[0].str.split(\",\", expand=True)\n",
"nav.columns = nav.iloc[0]\n",
"nav = nav[1:].reset_index(drop=True)\n",
"\n",
"quick_info(stocks, \"STOCKS\")\n",
"quick_info(flows, \"FLOWS\")\n",
"quick_info(nav, \"NAV/PRICES\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9bc92c9f-216c-475e-bfb8-edc1a4e839f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date conversion done.\n",
"NAV numeric conversion done.\n",
"String normalization done.\n",
"\n",
"ISIN missing in FLOWS but present in STOCKS : 17\n",
"\n",
"ISIN missing in STOCKS but present in FLOWS : 0\n",
"\n",
"ISIN missing in NAV but present in FLOWS : 67\n",
"\n",
"ISIN missing in NAV but present in STOCKS : 76\n",
"\n",
"Accounts in STOCKS but NEVER in FLOWS : 5777\n",
"\n",
"Accounts in FLOWS but NEVER in STOCKS : 118\n",
"\n",
"CLIENT BEHAVIOR (first 5 rows):\n",
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \n",
"0 49.280511 0.000 -109.238 -1.092380e+11 \n",
"1 NaN 0.000 -660.115 -6.601150e+11 \n",
"2 NaN 0.000 -174.646 -1.746460e+11 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+11 \n",
"\n",
"FUND BEHAVIOR (first 5 rows):\n",
" Product - Isin n_accounts n_days total_netflows vol_flows\n",
"0 FR0010135103 2690 2723 -2.571327e+07 2622.609244\n",
"1 FR0010147603 733 2719 -2.562187e+06 1206.248205\n",
"2 FR0010148981 1841 2722 -3.609440e+06 1051.069183\n",
"3 FR0010148999 454 2306 -7.130297e+05 1265.364138\n",
"4 FR0010149112 934 2000 -9.438901e+05 1834.961721\n"
]
}
],
"source": [
"# 1. CLEAN DATES (formats différents)\n",
"\n",
"stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"], errors=\"coerce\")\n",
"flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"nav[\"NavDate\"] = pd.to_datetime(nav[\"NavDate\"], format=\"%d/%m/%Y\", errors=\"coerce\")\n",
"\n",
"print(\"Date conversion done.\")\n",
"\n",
"# 2. CLEAN NUMERIC COLUMNS FOR NAV FILE\n",
"\n",
"num_cols = [\"PortfolioAum_Eur\",\"ShareClassPrice\",\"NumberOfShares\",\n",
" \"ShareClassAumLocalCur\",\"ShareClassAum_EUR\"]\n",
"\n",
"for col in num_cols:\n",
" nav[col] = (\n",
" nav[col]\n",
" .astype(str)\n",
" .str.replace(\",\", \".\", regex=False)\n",
" .str.replace(\" \", \"\")\n",
" .astype(float)\n",
" )\n",
"\n",
"print(\"NAV numeric conversion done.\")\n",
"\n",
"# 3. STANDARDIZE STRINGS FOR JOIN KEYS\n",
"\n",
"def norm(df):\n",
" for col in df.columns:\n",
" if df[col].dtype == \"object\":\n",
" df[col] = df[col].astype(str).str.strip().str.upper()\n",
" return df\n",
"\n",
"stocks = norm(stocks)\n",
"flows = norm(flows)\n",
"nav = norm(nav)\n",
"\n",
"print(\"String normalization done.\")\n",
"\n",
"\n",
"# 4. ANALYSE RELATIONS ACROSS FILES\n",
"\n",
"# Unique sets\n",
"isin_stocks = set(stocks[\"Product - Isin\"].unique())\n",
"isin_flows = set(flows[\"Product - Isin\"].unique())\n",
"isin_nav = set(nav[\"ShareClassIsin\"].unique())\n",
"\n",
"print(\"\\nISIN missing in FLOWS but present in STOCKS :\", len(isin_stocks - isin_flows))\n",
"print(\"\\nISIN missing in STOCKS but present in FLOWS :\", len(isin_flows - isin_stocks))\n",
"print(\"\\nISIN missing in NAV but present in FLOWS :\", len(isin_flows - isin_nav))\n",
"print(\"\\nISIN missing in NAV but present in STOCKS :\", len(isin_stocks - isin_nav))\n",
"\n",
"\n",
"# 5. CLIENTS: STOCKS VS FLOWS\n",
"\n",
"acc_stocks = set(stocks[\"Registrar Account - ID\"].unique())\n",
"acc_flows = set(flows[\"Registrar Account - ID\"].unique())\n",
"\n",
"print(\"\\nAccounts in STOCKS but NEVER in FLOWS :\", len(acc_stocks - acc_flows))\n",
"print(\"\\nAccounts in FLOWS but NEVER in STOCKS :\", len(acc_flows - acc_stocks))\n",
"\n",
"\n",
"# 6. CLIENT ACTIVITY METRICS (DETAILED)\n",
"\n",
"client_behavior = flows.groupby(\"Registrar Account - ID\").agg(\n",
" n_days=(\"Centralisation Date\", lambda x: x.nunique()),\n",
" n_transactions=(\"Quantity - NetFlows\", \"count\"),\n",
" total_netflows=(\"Quantity - NetFlows\", \"sum\"),\n",
" mean_flow=(\"Quantity - NetFlows\", \"mean\"),\n",
" std_flow=(\"Quantity - NetFlows\", \"std\"),\n",
" total_subscription=(\"Quantity - Subscription\", \"sum\"),\n",
" total_redemption=(\"Quantity - Redemption\", \"sum\")\n",
").reset_index()\n",
"\n",
"# Add churn metric\n",
"client_behavior[\"churn_ratio\"] = (\n",
" client_behavior[\"total_redemption\"] /\n",
" (client_behavior[\"total_subscription\"] + 1e-9)\n",
")\n",
"\n",
"print(\"\\nCLIENT BEHAVIOR (first 5 rows):\\n\", client_behavior.head())\n",
"\n",
"\n",
"# 7. FUNDS ACTIVITY METRICS\n",
"\n",
"fund_behavior = flows.groupby(\"Product - Isin\").agg(\n",
" n_accounts=(\"Registrar Account - ID\", \"nunique\"),\n",
" n_days=(\"Centralisation Date\", lambda x: x.nunique()),\n",
" total_netflows=(\"Quantity - NetFlows\", \"sum\"),\n",
" vol_flows=(\"Quantity - NetFlows\", \"std\")\n",
").reset_index()\n",
"\n",
"print(\"\\nFUND BEHAVIOR (first 5 rows):\\n\", fund_behavior.head())\n",
"\n",
"\n",
"# 8. SAVE INTERMEDIATE\n",
"\n",
"client_behavior.to_csv(\"client_behavior.csv\", index=False)\n",
"fund_behavior.to_csv(\"fund_behavior.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "afb51598-3a7b-41f2-8d25-5b4b8bfb1c8a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FULL usable ISIN : 407\n",
"Stocks only ISIN : 17\n",
"Flows only ISIN : 0\n",
"Missing NAV : 76\n",
"All ISIN groups saved into 4 separate files.\n"
]
}
],
"source": [
"valid_full = isin_stocks & isin_flows & isin_nav\n",
"stocks_only = isin_stocks - isin_flows\n",
"flows_only = isin_flows - isin_stocks\n",
"missing_nav = (isin_stocks | isin_flows) - isin_nav\n",
"\n",
"print(\"FULL usable ISIN :\", len(valid_full))\n",
"print(\"Stocks only ISIN :\", len(stocks_only))\n",
"print(\"Flows only ISIN :\", len(flows_only))\n",
"print(\"Missing NAV :\", len(missing_nav))\n",
"\n",
"pd.DataFrame({\"isin\": list(valid_full)}).to_csv(\"isin_full.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(stocks_only)}).to_csv(\"isin_stocks_only.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(flows_only)}).to_csv(\"isin_flows_only.csv\", index=False)\n",
"pd.DataFrame({\"isin\": list(missing_nav)}).to_csv(\"isin_missing_nav.csv\", index=False)\n",
"\n",
"print(\"All ISIN groups saved into 4 separate files.\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "61e0c71a-a1c6-4ed8-ba15-b7a9badc4d4a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \\\n",
"0 49.280511 0.000 -109.238 -1.092380e+08 \n",
"1 NaN 0.000 -660.115 -6.601150e+08 \n",
"2 NaN 0.000 -174.646 -1.746460e+08 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+08 \n",
"\n",
" churn_flag activity_score flow_volatility inertia_ratio \n",
"0 0 1.386294 49.280511 0.998921 \n",
"1 0 0.693147 0.000000 0.999640 \n",
"2 0 0.693147 0.000000 0.999640 \n",
"3 0 4.304065 2168.971331 0.976619 \n",
"4 0 0.693147 0.000000 0.999640 \n"
]
}
],
"source": [
"eps = 1e-6\n",
"\n",
"client_behavior[\"churn_ratio\"] = (\n",
" client_behavior[\"total_redemption\"] /\n",
" (client_behavior[\"total_subscription\"] + eps)\n",
")\n",
"\n",
"client_behavior[\"churn_flag\"] = (\n",
" client_behavior[\"total_redemption\"] > client_behavior[\"total_subscription\"]\n",
").astype(int)\n",
"\n",
"client_behavior[\"activity_score\"] = np.log1p(client_behavior[\"n_transactions\"])\n",
"\n",
"client_behavior[\"flow_volatility\"] = client_behavior[\"std_flow\"].fillna(0)\n",
"\n",
"client_behavior[\"inertia_ratio\"] = (\n",
" 1 - client_behavior[\"n_days\"] / flows[\"Centralisation Date\"].nunique()\n",
")\n",
"\n",
"print(client_behavior.head())\n",
"\n",
"client_behavior.to_csv(\"client_behavior_clean.csv\", index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "8ee7e911-eb73-4846-b545-661140411c1b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1219/1645623303.py:17: RuntimeWarning: invalid value encountered in scalar divide\n",
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_isin_held n_funds_held n_asset_types \\\n",
"0 100000014 1 1 1 \n",
"1 100000016 2 2 2 \n",
"2 100000028 1 1 1 \n",
"3 100000038 3 3 2 \n",
"4 100000042 1 1 1 \n",
"\n",
" n_strategies total_aum median_aum concentration_ratio \n",
"0 1 0.0000 0.0 NaN \n",
"1 2 0.0000 0.0 NaN \n",
"2 1 126236.2184 0.0 1.0 \n",
"3 3 0.0000 0.0 NaN \n",
"4 1 446362.9015 0.0 1.0 \n",
" n_isin_held n_funds_held n_asset_types n_strategies total_aum \\\n",
"count 12501.000000 12501.000000 12501.000000 12501.000000 1.250100e+04 \n",
"mean 5.514759 4.408367 2.082473 4.109271 4.218474e+08 \n",
"std 10.434698 5.472756 1.254048 4.714800 5.618341e+09 \n",
"min 1.000000 1.000000 1.000000 1.000000 -2.586805e+08 \n",
"25% 1.000000 1.000000 1.000000 1.000000 0.000000e+00 \n",
"50% 2.000000 2.000000 2.000000 2.000000 2.587605e+05 \n",
"75% 6.000000 5.000000 3.000000 5.000000 8.817014e+06 \n",
"max 469.000000 67.000000 6.000000 48.000000 4.780234e+11 \n",
"\n",
" median_aum concentration_ratio \n",
"count 1.250100e+04 7708.000000 \n",
"mean 2.573991e+05 0.790503 \n",
"std 3.487976e+06 0.261535 \n",
"min -2.317333e+06 -2.591840 \n",
"25% 0.000000e+00 0.576503 \n",
"50% 0.000000e+00 0.972159 \n",
"75% 1.474502e+02 1.000000 \n",
"max 2.215373e+08 2.983529 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1219/1645623303.py:17: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n"
]
}
],
"source": [
"# Diversification per account\n",
"account_div = stocks.groupby(\"Registrar Account - ID\").agg(\n",
" n_isin_held=(\"Product - Isin\", \"nunique\"),\n",
" n_funds_held=(\"Product - Fund\", \"nunique\"),\n",
" n_asset_types=(\"Product - Asset Type\", \"nunique\"),\n",
" n_strategies=(\"Product - Strategy\", \"nunique\"),\n",
" total_aum=(\"Value - AUM €\", \"sum\"),\n",
" median_aum=(\"Value - AUM €\", \"median\")\n",
").reset_index()\n",
"\n",
"# Concentration ratio per account\n",
"aum_by_account_fund = stocks.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Fund\"]\n",
")[\"Value - AUM €\"].sum().reset_index()\n",
"\n",
"concentration = aum_by_account_fund.groupby(\"Registrar Account - ID\") \\\n",
" .apply(lambda x: x[\"Value - AUM €\"].max() / x[\"Value - AUM €\"].sum()) \\\n",
" .reset_index(name=\"concentration_ratio\")\n",
"\n",
"# Merge diversification + concentration\n",
"account_static = account_div.merge(concentration, on=\"Registrar Account - ID\", how=\"left\")\n",
"\n",
"print(account_static.head())\n",
"print(account_static.describe())\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "76f6fa0d-9d7a-4145-af1c-986d83947f91",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID country region\n",
"0 100000014 SWITZERLAND SWITZERLAND\n",
"1 100000016 UNITED KINGDOM UNITED KINGDOM\n",
"2 100000028 UNITED KINGDOM UNITED KINGDOM\n",
"3 100000038 SWITZERLAND SWITZERLAND\n",
"4 100000042 UNITED KINGDOM UNITED KINGDOM\n"
]
}
],
"source": [
"# Geographic info per account\n",
"geo = stocks.groupby(\"Registrar Account - ID\").agg(\n",
" country=(\"RegistrarAccount - Country\", lambda x: x.mode()[0]),\n",
" region=(\"Registrar Account - Region\", lambda x: x.mode()[0])\n",
").reset_index()\n",
"\n",
"print(geo.head())\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e9bb67ab-9029-4ace-b960-b3d6e0b8683c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Registrar Account - ID n_days n_transactions total_netflows mean_flow \\\n",
"0 100000028 3 3 -109.238 -36.412667 \n",
"1 100000042 1 1 -660.115 -660.115000 \n",
"2 100000065 1 1 -174.646 -174.646000 \n",
"3 100000069 65 73 -7479.755 -102.462397 \n",
"4 100000073 1 1 -133.402 -133.402000 \n",
"\n",
" std_flow total_subscription total_redemption churn_ratio \\\n",
"0 49.280511 0.000 -109.238 -1.092380e+08 \n",
"1 NaN 0.000 -660.115 -6.601150e+08 \n",
"2 NaN 0.000 -174.646 -1.746460e+08 \n",
"3 2168.971331 33320.402 -40800.157 -1.224480e+00 \n",
"4 NaN 0.000 -133.402 -1.334020e+08 \n",
"\n",
" churn_flag ... n_funds_held n_asset_types n_strategies total_aum \\\n",
"0 0 ... 1.0 1.0 1.0 126236.2184 \n",
"1 0 ... 1.0 1.0 1.0 446362.9015 \n",
"2 0 ... 1.0 1.0 1.0 488743.4240 \n",
"3 0 ... 0.0 0.0 0.0 NaN \n",
"4 0 ... 2.0 2.0 2.0 373322.8948 \n",
"\n",
" median_aum concentration_ratio country region \\\n",
"0 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"1 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"2 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"3 NaN NaN UNKNOWN UNKNOWN \n",
"4 0.0 1.0 UNITED KINGDOM UNITED KINGDOM \n",
"\n",
" log_total_aum log_median_aum \n",
"0 11.745918 0.0 \n",
"1 13.008890 0.0 \n",
"2 13.099595 0.0 \n",
"3 NaN NaN \n",
"4 12.830202 0.0 \n",
"\n",
"[5 rows x 24 columns]\n",
" Registrar Account - ID n_days n_transactions total_netflows \\\n",
"count 6842 6842.000000 6842.000000 6.842000e+03 \n",
"unique 6842 NaN NaN NaN \n",
"top 100000028 NaN NaN NaN \n",
"freq 1 NaN NaN NaN \n",
"mean NaN 122.731804 376.273166 3.426362e+04 \n",
"std NaN 363.726141 1972.817028 1.644676e+06 \n",
"min NaN 1.000000 1.000000 -9.982301e+06 \n",
"25% NaN 2.000000 2.000000 -3.448661e+02 \n",
"50% NaN 5.000000 6.000000 -1.116000e+00 \n",
"75% NaN 27.000000 42.000000 4.220087e+01 \n",
"max NaN 2715.000000 53314.000000 1.319043e+08 \n",
"\n",
" mean_flow std_flow total_subscription total_redemption \\\n",
"count 6842.000000 5.696000e+03 6.842000e+03 6.842000e+03 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 420.503483 6.035653e+03 1.565831e+05 -1.223195e+05 \n",
"std 15548.555778 7.652710e+04 4.204641e+06 2.600731e+06 \n",
"min -333474.890000 0.000000e+00 -3.931320e+02 -2.069900e+08 \n",
"25% -35.658375 5.643245e+01 0.000000e+00 -6.968600e+03 \n",
"50% -0.067287 2.479222e+02 3.393685e+02 -4.493603e+02 \n",
"75% 2.235534 9.869604e+02 6.000000e+03 -7.275400e+01 \n",
"max 871531.706418 4.697263e+06 3.388942e+08 0.000000e+00 \n",
"\n",
" churn_ratio churn_flag ... n_funds_held n_asset_types \\\n",
"count 6.842000e+03 6842.000000 ... 6842.000000 6842.000000 \n",
"unique NaN NaN ... NaN NaN \n",
"top NaN NaN ... NaN NaN \n",
"freq NaN NaN ... NaN NaN \n",
"mean -7.445486e+08 0.000146 ... 5.054224 2.114294 \n",
"std 1.163193e+10 0.012090 ... 6.457703 1.342230 \n",
"min -5.212597e+11 0.000000 ... 0.000000 0.000000 \n",
"25% -2.279500e+06 0.000000 ... 1.000000 1.000000 \n",
"50% -1.048786e+00 0.000000 ... 2.000000 2.000000 \n",
"75% -9.333542e-01 0.000000 ... 7.000000 3.000000 \n",
"max 3.225589e+01 1.000000 ... 67.000000 6.000000 \n",
"\n",
" n_strategies total_aum median_aum concentration_ratio \\\n",
"count 6842.000000 6.724000e+03 6.724000e+03 6586.000000 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 4.631102 7.136776e+08 4.051506e+05 0.782903 \n",
"std 5.493014 7.438256e+09 4.121668e+06 0.267426 \n",
"min 0.000000 -9.151116e+07 -2.317333e+06 -2.591840 \n",
"25% 1.000000 5.107611e+05 0.000000e+00 0.561855 \n",
"50% 2.000000 4.221523e+06 0.000000e+00 0.965881 \n",
"75% 6.000000 3.987106e+07 2.531545e+04 1.000000 \n",
"max 48.000000 4.780234e+11 2.215373e+08 2.983529 \n",
"\n",
" country region log_total_aum log_median_aum \n",
"count 6842 6842 6724.000000 6724.000000 \n",
"unique 34 16 NaN NaN \n",
"top FRANCE FRANCE NaN NaN \n",
"freq 2631 2643 NaN NaN \n",
"mean NaN NaN 15.046065 4.392450 \n",
"std NaN NaN 4.320148 5.462132 \n",
"min NaN NaN 0.000000 0.000000 \n",
"25% NaN NaN 13.143657 0.000000 \n",
"50% NaN NaN 15.255707 0.000000 \n",
"75% NaN NaN 17.501160 10.139210 \n",
"max NaN NaN 26.892926 19.216101 \n",
"\n",
"[11 rows x 24 columns]\n"
]
}
],
"source": [
"# 1. Merge behavior (flows) with static diversification (stocks)\n",
"client_master = client_behavior.merge(\n",
" account_static,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# 2. Add geographic info\n",
"client_master = client_master.merge(\n",
" geo,\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"# 3. Create additional engineered features\n",
"client_master[\"log_total_aum\"] = np.log1p(client_master[\"total_aum\"].clip(lower=0))\n",
"client_master[\"log_median_aum\"] = np.log1p(client_master[\"median_aum\"].clip(lower=0))\n",
"\n",
"\n",
"# 4. Replace NaN flow volatility with 0 (inactive accounts)\n",
"client_master[\"flow_volatility\"] = client_master[\"flow_volatility\"].fillna(0)\n",
"\n",
"# 5. Fill missing diversification metrics with 0 (for accounts without stocks)\n",
"client_master[[\"n_isin_held\",\"n_funds_held\",\"n_asset_types\",\"n_strategies\"]] = \\\n",
" client_master[[\"n_isin_held\",\"n_funds_held\",\"n_asset_types\",\"n_strategies\"]].fillna(0)\n",
"\n",
"# 6. Fill missing geography as “UNKNOWN”\n",
"client_master[\"country\"] = client_master[\"country\"].fillna(\"UNKNOWN\")\n",
"client_master[\"region\"] = client_master[\"region\"].fillna(\"UNKNOWN\")\n",
"\n",
"# 7. Export\n",
"client_master.to_csv(\"client_master.csv\", index=False)\n",
"\n",
"print(client_master.head())\n",
"print(client_master.describe(include='all'))\n"
]
},
{
"cell_type": "markdown",
"id": "fb1e98a5-6ab4-4371-ba45-6558ff38c839",
"metadata": {},
"source": [
"Détection des ruptures"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "6bdd8077-c8e0-451d-a7b8-15a2705ad196",
"metadata": {},
"outputs": [],
"source": [
"# --- 1. PREPARE STOCKS ---\n",
"stocks_clean = stocks[[\n",
" \"Registrar Account - ID\", \"Product - Isin\", \n",
" \"Centralisation Date\", \"Quantity - AUM\"\n",
"]].copy()\n",
"\n",
"stocks_clean[\"Centralisation Date\"] = pd.to_datetime(stocks_clean[\"Centralisation Date\"])\n",
"stocks_clean = stocks_clean.sort_values([\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"])\n",
"\n",
"# --- 2. PREPARE FLOWS ---\n",
"flows_clean = flows[[\n",
" \"Registrar Account - ID\", \"Product - Isin\", \n",
" \"Centralisation Date\", \"Quantity - NetFlows\"\n",
"]].copy()\n",
"\n",
"flows_clean[\"Centralisation Date\"] = pd.to_datetime(flows_clean[\"Centralisation Date\"])\n",
"\n",
"# Aggregate flows per day to avoid duplicates\n",
"flows_clean = flows_clean.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")[\"Quantity - NetFlows\"].sum().reset_index()\n",
"\n",
"# --- 3. MERGE STOCKS WITH FLOWS ---\n",
"merged = stocks_clean.merge(\n",
" flows_clean,\n",
" on=[\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"merged[\"Quantity - NetFlows\"] = merged[\"Quantity - NetFlows\"].fillna(0)\n",
"\n",
"# --- 4. SHIFT STOCKS TO COMPARE t vs t+1 ---\n",
"merged[\"prev_stock\"] = merged.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - AUM\"].shift(1)\n",
"\n",
"# SHIFT NET FLOWS FROM PREVIOUS DATE\n",
"merged[\"prev_netflows\"] = merged.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - NetFlows\"].shift(1).fillna(0)\n",
"\n",
"# Expected stock\n",
"merged[\"expected_stock\"] = merged[\"prev_stock\"] + merged[\"prev_netflows\"]\n",
"\n",
"# --- 5. COMPUTE GAP BETWEEN EXPECTED AND REAL ---\n",
"merged[\"gap\"] = merged[\"Quantity - AUM\"] - merged[\"expected_stock\"]\n",
"\n",
"# tolerance for numerical noise\n",
"TOL = 1e-6\n",
"merged[\"rupture_flag\"] = (merged[\"prev_stock\"].notna()) & (merged[\"gap\"].abs() > TOL)\n",
"\n",
"# --- 6. AGGREGATE BY CLIENT TO DETECT BIG ISSUES ---\n",
"rupture_summary = merged.groupby(\"Registrar Account - ID\").agg(\n",
" n_ruptures=(\"rupture_flag\", \"sum\"),\n",
" total_obs=(\"rupture_flag\", \"count\"),\n",
" rupture_ratio=(\"rupture_flag\", \"mean\"),\n",
" max_gap=(\"gap\", lambda x: x.abs().max())\n",
").reset_index()\n",
"\n",
"# Sort by biggest anomalies\n",
"rupture_summary = rupture_summary.sort_values(\"rupture_ratio\", ascending=False)\n",
"\n",
"rupture_summary.head(10)\n",
"\n",
"rupture_summary_asc = rupture_summary.sort_values(\"rupture_ratio\", ascending=True)\n",
"rupture_summary_asc.to_csv('rupture.csv')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "9e32fd6b-4754-4196-9487-ffdc0bb4fc06",
"metadata": {},
"outputs": [],
"source": [
"merged.to_csv('merged.csv')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "71cd67aa-f4b9-489e-b928-defeca459cb6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_ruptures</th>\n",
" <th>total_obs</th>\n",
" <th>rupture_ratio</th>\n",
" <th>max_gap</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>200000331</td>\n",
" <td>0</td>\n",
" <td>160</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>182</th>\n",
" <td>200000361</td>\n",
" <td>0</td>\n",
" <td>80</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12198</th>\n",
" <td>422302</td>\n",
" <td>0</td>\n",
" <td>240</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12197</th>\n",
" <td>422299</td>\n",
" <td>0</td>\n",
" <td>80</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12191</th>\n",
" <td>422288</td>\n",
" <td>0</td>\n",
" <td>1200</td>\n",
" <td>0.000000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6522</th>\n",
" <td>365568</td>\n",
" <td>237</td>\n",
" <td>240</td>\n",
" <td>0.987500</td>\n",
" <td>16596.971</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>200000407</td>\n",
" <td>79</td>\n",
" <td>80</td>\n",
" <td>0.987500</td>\n",
" <td>63893.601</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6884</th>\n",
" <td>365966</td>\n",
" <td>79</td>\n",
" <td>80</td>\n",
" <td>0.987500</td>\n",
" <td>2673.873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7039</th>\n",
" <td>366351</td>\n",
" <td>258</td>\n",
" <td>260</td>\n",
" <td>0.992308</td>\n",
" <td>1998.948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>603</th>\n",
" <td>200001928</td>\n",
" <td>645</td>\n",
" <td>650</td>\n",
" <td>0.992308</td>\n",
" <td>110779.418</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>12501 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_ruptures total_obs rupture_ratio max_gap\n",
"165 200000331 0 160 0.000000 0.000\n",
"182 200000361 0 80 0.000000 0.000\n",
"12198 422302 0 240 0.000000 0.000\n",
"12197 422299 0 80 0.000000 0.000\n",
"12191 422288 0 1200 0.000000 0.000\n",
"... ... ... ... ... ...\n",
"6522 365568 237 240 0.987500 16596.971\n",
"197 200000407 79 80 0.987500 63893.601\n",
"6884 365966 79 80 0.987500 2673.873\n",
"7039 366351 258 260 0.992308 1998.948\n",
"603 200001928 645 650 0.992308 110779.418\n",
"\n",
"[12501 rows x 5 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rupture_summary_asc"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "72332a7e-0ab0-474b-aac7-b52ebbba7a8b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+kAAAGKCAYAAABwwgCiAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAgTVJREFUeJzt3Xd8U+X+B/BPkqbpoBMoLauUDbLBQsUBSBliERFFEEERFQRFUH+KIqUuHOBGvMq6XkSGC1AsVKAsK5VRpJTdQhU7KKWbpiF5fn9gQtOMJm1m83m/Xtxrz3lO8pzm6cn5nmd8JUIIASIiIiIiIiJyOqmzK0BERERERERE1zFIJyIiIiIiInIRDNKJiIiIiIiIXASDdCIiIiIiIiIXwSCdiIiIiIiIyEUwSCciIiIiIiJyEQzSiYiIiIiIiFwEg3QiIiIiIiIiF8EgnYiIiIiIiMhFMEgnImrAFi5cCIlEguTkZGdXxWbOnz8PiUSCRx55xK7vM2jQIEgkEru+h7W2b9+OgQMHIiQkBBKJBGPGjHF2lYiIiMjGGKQTEdVBeXk53nrrLfTp0weNGjWCQqFAy5Ytcdttt2HevHk4d+6cXvk2bdqgTZs2zqmsDbRp0wYSicTsv/Pnzzu7mlZxtwcY58+fxz333IPMzEw8+uijiI+Px4MPPui0+ggh0L59e0gkEowaNcpp9XAVdW1P+/btw3PPPYe+ffuicePG8PHxQefOnfHiiy+iqKjI5HF//PEH7rrrLgQHB8Pf3x8DBgzAhg0bTJbPycnBY489hoiICPj4+KBTp0548803oVKpjJZXKpV47bXX0KFDB/j4+KB58+Z44oknkJ+fb/I9vv76a0RHR8Pf3x8hISG4++67cfjwYaNl16xZgyeffBL9+vWDQqGARCLB6tWrTb42ABw4cAD33HMPmjRpAoVCgQ4dOmDBggW4evWqQdm0tDS8+uqrGDBgAMLCwqBQKNC2bVs89dRTuHjxosn32LVrF+666y60atUKvr6+aNeuHSZOnIijR4+arRsRkS15ObsCRETuprS0FLfeeiv+/PNPtG/fHpMmTULjxo1RUFCA1NRUvP3222jXrh3atWvn7KralEwmw/z5803uDw4OdlxlHOCrr75CRUWFs6uh8+uvv6KyshJLlizBxIkTnV0dJCcn49y5c5BIJNi2bRv++ecfNG/e3NnVcjvjxo1DQUEBbr31VkyePFkX6L/77rv49ttv8dtvv6FZs2Z6x+zatQvDhw+Hj48PHnzwQQQEBOC7777D+PHj8ddff+G5557TK5+bm4v+/fvj77//xr333osOHTpg9+7dmD9/PlJTU/Hjjz/qjRrRaDS45557sG3bNgwYMAD33Xcfzpw5g+XLl2PHjh34/fff0bRpU733ePPNNzF//nxERkZi+vTpKC0txbp163DLLbdgx44dGDhwoF75+fPn48KFC2jSpAkiIiJw4cIFs7+n77//HuPHj4dMJsN9992H8PBw7N+/H6+//jp27tyJHTt2QKFQ6MpPnz4dBw4cQHR0NB588EEoFAocOHAAy5Ytw8aNG7F371507txZ7z0++eQTPPPMMwgODsbYsWPRtGlTnD59Ghs3bsS3336LrVu3YujQobV/qERE9SWIiMgqr732mgAgpk2bJjQajcH+zMxMceLECb1tkZGRIjIy0kE1vCE+Pl4AELt27arX60RGRgqFQmGbStVTVlaWACCmTJlSr9ex1e/GURISElyqvg899JAAIJ5//nkBQLz55pvOrpJT1bU9vf322+LixYt62zQajZgxY4YAIJ566im9fSqVSrRr104oFApx5MgR3faioiLRsWNH4e3tLc6fP693zOTJkwUAsWzZMr33ePDBBwUAsXbtWr3yK1euFADEhAkT9K5xy5YtEwDEE088oVf+9OnTwsvLS3Ts2FEUFRXpth85ckQoFArRpUsXoVar9Y5JSkrS1XPRokUCgFi1apXR31FFRYVo2rSpkMvl4uDBg3rnMHPmTAFALFq0SO+Yjz/+WJw5c8bgtd5++20BQNx1111626uqqkRgYKAIDAwU2dnZevu+//57AUAMHjzYaP2IiGyNQToRkZVGjhwpAOjdIJuiDSiN/YuPj9cru3LlShEdHS38/f2Fv7+/iI6ONnnTKoQQu3fvFvfcc48ICwsT3t7eomXLluLee+8Ve/fu1ZUxFTgcO3ZMtGjRQgQHB+uVN8WaIH3IkCFCIpEYBApaTz/9tAAgtm/frrfd0vM3FaSbexByxx13iOrPpbU/1/xX/fiax2ipVCqxZMkS0aNHD+Hj4yMCAwPFoEGDxObNmw3Krlq1Shd8bNu2TcTExAhfX18RGhoqJk+eLAoKCozW19j5GvtX/XM9duyYuP/++0XTpk2Ft7e3aNOmjZg9e7bR99D+rq5cuSJmzpwpWrZsKWQymdn2Vt2VK1eEj4+P6Natm6ioqBABAQGiXbt2Rh9aCXE9mFq5cqW49dZbRVBQkPD19RXt27cXTzzxhLhw4YJe2ZKSErFw4ULRvXt34evrKwIDA0WvXr3E/PnzRVVVlV7Zffv2ibvuukuEhIQIhUIhOnXqJBYsWCDKy8uN/g5NPdgBIO644w69bdrPv6qqSsTHx4vIyEjh7e0tOnToIJYuXWq0rLn2ZK1//vlHABA33XST3vZt27YJAOLRRx81OGb16tUCgEhISNBtKykpEQqFQrRt29bg8zl//rzR4DMmJkYAMPgb1mg0om3btsLf319UVFTots+bN08AEP/9738N6vTII48IAGL37t0mz7W2IP3XX38VAMT9999vsO/KlSu637Wp9lfdtWvXhK+vr/D399fbnpOTIwCIW265xeAYpVIpJBKJ6NatW62vT0RkCxzuTkRkpcaNGwMATp8+jV69epktGxwcjPj4eHz44YcAgGeffVa3b9CgQbr/fuaZZ/DJJ5+gRYsWeOyxxwAA3333HR599FEcOXIEH330kd7rfvTRR5gzZw58fX1x7733onXr1rh48SL27duHb7/9FrfeeqvJOu3btw9xcXHw9/fH3r170a1bN8tP3gIPP/wwdu7cia+//hovv/yy3r5r165h3bp1aN68Oe68807ddmvPv760i87t3r0bU6ZM0a0XUNuQfSEExo0bh02bNqFjx46YOXMmysvLsX79eowePRrvv/8+5syZY3Dc5s2b8fPPPyMuLg633HIL9uzZg6+++grnzp3Dvn37zL6ntg0lJycb1Ff7//v27cPw4cNRVVWFcePGoU2bNkhJScFHH32En376Cb///juaNGmi97pKpRJDhgxBWVkZRo8eDS8vL4Nh1aasXbsWlZWVmDx5Mnx9fTFu3DisWrUKu3fv1mvXwPWh0+PHj8e3336LFi1aYMKECQgMDMT58+exYcMGjBw5Eq1btwYA5Ofn44477sDJkyfRq1cvzJgxAxqNBidPnsQ777yD5557TvcZbdy4ERMmTIBCocD48eMRFhaG7du347XXXsO2bduQnJwMHx8fi87HnAkTJiA1NRUjR46ETCbDhg0bMHPmTMjlcjz++OMA6t6ezJHL5QAALy/9WzXtnPdhw4YZHDN8+HBdPbRSUlKgVCoRGxtrsBBiZGQkOnXqhP3790OtVkMmk6GyshIHDhxAp06dEBkZqVdeIpEgNjYW//nPf3Dw4EHcdtttFtVp9erV2L17N26//XYrfgM35ObmAgCioqIM9gUHByMkJAQXLlxAZmZmrdOMJBIJ5HK5we+iWbNmaNKkCdLT0/HXX3+hVatWun0///wzhBB61ywiIrty9lMCIiJ3s2nTJgFABAQEiOeee05s27at1h5Rc728u3fvFgBEly5d9IaKFhYWio4dOwoAYs+ePbrtaWlpQiqViubNm4usrCy919JoNHpDZ2v2pG/atEn4+vqKTp06GfRg1lZ/mUwm4uPjjf6rPoy2pKRE+Pr6iq5duxq8zpYtW3RDpOt6/rboSRei9uHJxo7573//q+t1VSqVuu0XLlwQTZo0EV5eXuLcuXO67dqedC8vL7Fv3z7d9mvXrolBgwYJACIlJcXo+9dkqr5
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAGKCAYAAABacvENAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAffxJREFUeJzt3Xl8jNf+B/DPzCSZSciqshERuwgRFNFSlCSq1LUUraK2Uqlauukiwm21WkpL6/op2osqXSzVhqC2Cq4ltqBthLRkokQykUgyMuf3Rzojk1kyiclMMvm8X6/ca85znuc5T3JEv+c553skQggBIiIiIiIiInJYUns3gIiIiIiIiIiqFoN/IiIiIiIiIgfH4J+IiIiIiIjIwTH4JyIiIiIiInJwDP6JiIiIiIiIHByDfyIiIiIiIiIHx+CfiIiIiIiIyMEx+CciIiIiIiJycAz+iYiIiIiIiBwcg38iIrKbuXPnQiKRYN++ffZuitVcuXIFEokEY8eOrdL79OzZExKJpErvUVG7du3CI488Am9vb0gkEgwaNMjeTSIiIqJ/MPgnInJAeXl5eO+999ChQwfUrVsXcrkcDRs2RPfu3TF79mykpqbq1W/cuDEaN25sn8ZaQePGjSGRSMx+Xblyxd7NrJCaNjBy5coVPPXUU7h8+TKef/55xMXFYcSIEXZrjxACzZo1g0QiQf/+/e3Wjuqisv3p6NGjGDNmDMLCwuDj4wOFQoFmzZph+PDhOH78uNlzf/jhB/Tt2xf16tWDQqFASEgIRo4ciT///NNo/bS0NEycOBHBwcGQy+Xw8/NDr169sHnzZr16ly5dwsSJExEREYH69etDLpejcePGePLJJ7Fnzx6j19YOlhn7Mva778aNG1iwYAGGDh2KkJAQXV0ioprMyd4NICIi68rNzcWjjz6KM2fOoFmzZhg1ahTq1auHmzdv4tixY3j//ffRtGlTNG3a1N5NtSqZTIa3337b5HEvLy/bNcYGvvrqK+Tn59u7GTq7d+9GQUEBFi1ahGeeecbezcG+ffuQmpoKiUSCnTt34vr16wgMDLR3s2qcgwcPIjExEV27dkXv3r3h5uaGy5cvY9u2bdi8eTO+/PJLPPfcc3rnCCEwefJkrFy5Ek2bNsWIESPg7u6O69evY//+/bh69SqCgoL0zklMTNTNFBkwYACaNGmC27dv48yZM9i9ezeGDRumq3v27Fl8//33iIyMRLdu3eDh4YFr165h69at2LFjB/7973/jrbfeMvo8cXFxBmXGfjekpKTgzTffhEQiQfPmzeHm5lat/r4REVUGg38iIgezZMkSnDlzBhMmTMDKlSsN3lalpaWhsLDQTq2rOk5OTpg7d669m2EzjRo1sncT9Fy/fh0Aqk2A/cUXXwAAZs2ahY8++ghr167Fm2++aedW1TyxsbF45ZVXDMrPnTuHhx9+GK+88gpGjRql93vmk08+wcqVK/Hiiy/ik08+gUwm0zv33r17ep/T09MxdOhQNGjQALt37zbo22XrDxgwAEOGDDH43Xb9+nVEREQgPj4eU6dONRrUW/o7onXr1ti/fz8iIiLg7u6OVq1a4dKlSxadS0RUXXHaPxGRg0lKSgIATJ061eg01ZCQELRq1QrA/fXpV69exdWrV/Wmwpb9j+Q1a9agS5cuqFu3LurWrYsuXbpg7dq1Jttx4MABDBo0CH5+fpDL5QgKCsLgwYNx6NChcp/h3LlzaNiwIby9vS2qXxGPP/44pFIprl69avT4tGnTIJFIkJiYqFde0ecvy9zSirLr93v27In4+HgAQK9evYxOTza15v/evXtYvHgxwsPD4erqCk9PT/Tq1Qvbt283qLt27VpIJBKsXbsWu3btQrdu3eDm5oZ69ephzJgxuHXrVrnPpe1D2jeqpdtbeor5uXPn8PTTT8PX1xdyuRwhISGYPn260Xtov1fZ2dmIjY1FUFAQnJycLP5+Z2dn47vvvkNYWBjmzZsHd3d3rF69GkIIo/WFEFizZg26d+8OLy8vuLm5oXnz5njhhReQnp6uVzc3Nxfx8fFo164d3Nzc4OnpiYiICLzzzjtQq9V6dX/99Vf0799fN12+VatWiIuLM3iDXF6eCIlEgp49e+qVaX/+arUac+fORePGjSGXy9GiRQt89tlnBnXL60+mKBQKo+VhYWFo3bo1bty4AZVKpSu/e/cu4uPj0aRJEyxdutQg8AdKBupKe++996BSqbBixQqjg1pl68vlcqN9PzAwEI888gjUarXJv9+W8vPzQ48ePeDu7v5A1yEiqk745p+IyMHUq1cPAPDbb7+hffv2Zut6eXkhLi4OS5YsAQBMnz5dd6x0sDFt2jR8+umnaNCgAcaPHw8A+O677/D888/j1KlTWLp0qd51ly5dihkzZsDV1RX/+te/0KhRI1y7dg2HDh3Ct99+i0cffdRkmw4dOoQBAwagTp06OHjwIMLCwix/eAs899xz2Lt3L9avX2/wJvjevXvYuHEjAgMD8fjjj+vKK/r8D0obBO7fvx9jxozRBWnlLV0QQmDo0KHYunUrWrRogalTpyIvLw/ffPMNBg4ciMWLF2PGjBkG523btg07duzAgAED0K1bNxw4cABfffUVUlNTyx180fahffv2GbRX+/+HDh1CdHQ0ioqKMHToUDRu3BhJSUlYunQpfvzxRxw5cgQPPfSQ3nULCwvRu3dv3LlzBwMHDoSTkxP8/PzK/d4BwIYNG1BQUIDRo0fD1dUVQ4cOxZo1a7B//36DIFqj0WD48OH49ttv0aBBA4wcORIeHh64cuUKNm3ahH79+ukC0hs3buCxxx7DxYsX0b59e0yZMgUajQYXL17EBx98gFmzZul+Rps3b8bIkSMhl8sxfPhw+Pr6YteuXZg3bx527tyJffv2mQysK2LkyJE4duwY+vXrB5lMhk2bNmHq1KlwdnbGxIkTAVS+P5mTmpqKS5cuISgoCJ6enrryXbt24fbt23j++edRXFyMbdu24bfffoOXlxf69OmDZs2a6V1HCIHNmzejXr166N27N06cOIH9+/dDo9Ggffv26N27N6RSy95V3bp1C0ePHoWbmxuaNGlitM6GDRtw5coVuLm5oX379ujRo4fF1yciqvEEERE5lK1btwoAwt3dXcyaNUvs3LlT3Lx50+w5wcHBIjg42Oix/fv3CwCidevWIjs7W1eelZUlWrRoIQCIAwcO6MqTk5OFVCoVgYGBIi0tTe9aGo1GXLt2Tfc5Li5OABC//PKLru2urq6iZcuW4urVqxY/c3BwsJDJZCIuLs7o1+eff66rq1KphKurqwgNDTW4zvbt2wUA8corr1T6+dPS0gQAMWbMGIM2mvoeP/bYY6LsP8llvzeWnPPll18KAOKxxx4ThYWFuvKrV6+Khx56SDg5OYnU1FRd+Zo1awQA4eTkJA4dOqQrv3fvnujZs6cAIJKSkozevyxT7S0uLhZNmzYVAERCQoLesVdffVUAEOPGjdMrDw4OFgBEdHS0yM/Pt+j+pXXo0EFIpVJdX9u7d68AIEaNGmVQ99NPPxUAxOOPP25wr/z8fHHr1i3d5yFDhggA4s033zS4jlKpFGq1WgghRE5OjvD09BRyuVycPn1aV6e4uFgMHz5cABDz5s3TlZvqM1ran2lp2p9/ly5dRE5Ojq784sWLwsnJSbRs2VKvfnn9qTxHjx4VcXFx4s033xTPPvuscHd3F25ubmLHjh169d555x0BQLz66qu6vx/aL6lUKmbNmqVXPzU1VQAQnTp1EpMmTdKrD0BERESIP//802ibLl26JOLi4sTbb78txo0bp+vjq1atMqir/X6V/WrRooX43//+V+7zt2zZ0uDvGxFRTcPfYkREDmjRokWibt26ev+R27RpUzF16lTx22+/GdQ3F5iOGzdOABDffPONwbH169cbBG9TpkwRAMTq1avLbWfpgGTVqlVCJpOJzp07i7///tvyhxX3g0VTX+Hh4Xr1R44cKQCIEydO6JU//fTTAoBITk6u9PPbM/jv3bu3ACCOHj1qUP/dd981CDq1wf/o0aMN6muPffLJJ0bvX5a
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAGKCAYAAABacvENAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhXRJREFUeJzt3XlcVOX+B/DPzLAMKAOCsqiIiCui4obS6g4uqGX+1LJIza5cKZcWs0ykbteyXNP0tqiVmmnlnijumiiJoiKuiOLCgIoyiLLNPL8/aEbHGZgZBYbl8369uDfO+c45z+EcqO9znuf7SIQQAkRERERERERUbUmt3QAiIiIiIiIiKl9M/omIiIiIiIiqOSb/RERERERERNUck38iIiIiIiKiao7JPxEREREREVE1x+SfiIiIiIiIqJpj8k9ERERERERUzTH5JyIiIiIiIqrmmPwTERERERERVXNM/omIyGpmzJgBiUSCPXv2WLspZebSpUuQSCR4/fXXy/U83bp1g0QiKddzWGr79u14+umnUadOHUgkEgwePNjaTSIiIqJ/MPknIqqGcnNz8d///hcdOnRA7dq1YW9vj4YNG+LZZ5/F1KlTkZKSohffuHFjNG7c2DqNLQONGzeGRCIp9evSpUvWbqZFqlrHyKVLlzBo0CBcvHgRo0aNQlRUFIYPH2619ggh0LRpU0gkEvTv399q7agsHvd5Onz4MMLDwxEQEABXV1fI5XI0bdoUw4YNw5EjR0r97Lp169C7d2+4ublBLpfD19cXI0aMwJUrV4y2zZLf3by8PHz66afw9/eHXC5HnTp10LdvX/z111+ltun48eN4+eWX0aBBA9jb26N+/fro27cvdu/ebTQ+JycHUVFRCAgIgKOjI1xcXNChQwdER0eX/oMjIqqEbKzdACIiKls5OTl45plncOLECTRt2hQjR46Em5sbbt68ifj4eHz++efw8/ODn5+ftZtapmQyGaZNm1bifhcXl4prTAX46aefcO/ePWs3Q2fHjh3Iy8vD7Nmz8fLLL1u7OdizZw9SUlIgkUiwbds2XL9+HfXr17d2s6qc/fv3IzY2Fl27dkWPHj3g6OiIixcvYuPGjVi7di1+/PFHvPrqq3qfEUJg3Lhx+Pbbb+Hn54fhw4fDyckJ169fx969e3H58mV4e3sbnCs8PNxoJ+Sjv7t5eXno2bMnDh48iLZt2yIiIgJ37tzB77//jueffx6///47Bg0aZHCcn376CaNHj4azszMGDBiABg0a4ObNmzhy5AgOHjyI7t2768WnpaWhR48euHjxInr16oX+/fsjPz8fFy5cwO+//46oqCjLf6BERFbE5J+IqJqZN28eTpw4gTfeeAPffvutwdDw1NRU5OfnW6l15cfGxgYzZsywdjMqTKNGjazdBD3Xr18HgEqTYP/www8AgHfeeQdfffUVli9fjg8//NDKrap6IiMj8e677xpsT0pKQufOnfHuu+9i5MiRen9nFixYgG+//Rb//ve/sWDBAshkMr3PFhUVGT3X66+/jm7dupls08KFC3Hw4EEMHToUv/zyi+7406ZNQ4cOHTB27Fj06NEDTk5Ous8kJCRgzJgx6Ny5M/7880/UqVOn1DYVFRVhyJAhuH79Onbu3GnQMVDSNRARVWYc9k9EVM3ExcUBAMaPH290Trivry9atmwJ4MH89MuXL+Py5ct6Q20fTaSXLVuGLl26oHbt2qhduza6dOmC5cuXl9iOffv2YfDgwfDw8IC9vT28vb3x4osv4sCBAyavISkpCQ0bNkSdOnXMirdEz549IZVKcfnyZaP73377bUgkEsTGxuptt/T6H1Xa1IpH5+9369ZNN6y4e/fuunvy8OdLmvNfVFSEOXPmoF27dnBwcICzszO6d++OTZs2GcQuX74cEokEy5cvx/bt2/HUU0/B0dERbm5uCA8Px61bt0xel/YZ0r4Ffbi9Dw8xT0pKwv/93//B3d0d9vb28PX1xcSJE42eQ/uzunPnDiIjI+Ht7Q0bGxuzf97at8ABAQH45JNP4OTkhKVLl0IIYTReCIFly5bh2WefhYuLCxwdHdGsWTP861//Qlpaml5sTk4OoqOj0bZtWzg6OsLZ2Rnt27fHxx9/jMLCQr3Yv/76C/3799cNl2/ZsiWioqIMRmyYqhMhkUgMkmLt/S8sLMSMGTPQuHFj2Nvbo3nz5vjmm28MYk09TyWRy+VGtwcEBKBVq1bIzMyESqXSbb9//z6io6PRpEkTzJ8/3yDxB4o76p7Ehg0bABRPF3j4+H5+fhg9ejRu3LiB3377Te8zH330EdRqNX7++WeDxN9Ym3777TccOXIE7777rkHiXxbXQERkDfzLRURUzbi5uQEAzp07h8DAwFJjXVxcEBUVhXnz5gEAJk6cqNv3cLLx9ttv4+uvv0aDBg0wZswYAMDvv/+OUaNG4dixY5g/f77ecefPn49JkybBwcEBL7zwAho1aoRr167hwIED+O233/DMM8+U2KYDBw4gLCwMtWrVwv79+xEQEGD+xZvh1Vdfxa5du7By5UqDN8FFRUVYvXo16tevj549e+q2W3r9T0qbBO7du1dvKLSpqQtCCLz00kvYsGEDmjdvjvHjxyM3Nxe//vorBg4ciDlz5mDSpEkGn9u4cSO2bNmCsLAwPPXUU9i3bx9++uknpKSkmOx80T5De/bsMWiv9v8PHDiAkJAQFBQU4KWXXkLjxo0RFxeH+fPnY/PmzTh06BDq1q2rd9z8/Hz06NEDd+/excCBA2FjYwMPDw+TPzsAWLVqFfLy8vDaa6/BwcEBL730EpYtW4a9e/caJNEajQbDhg3Db7/9hgYNGmDEiBFQKBS4dOkS1qxZg759++pGWWRmZuL555/HmTNnEBgYiIiICGg0Gpw5cwZffPEF3nnnHd09Wrt2LUaMGAF7e3sMGzYM7u7u2L59Oz755BNs27YNe/bsKTGxtsSIESMQHx+Pvn37QiaTYc2aNRg/fjxsbW0xduxYAI//PJUmJSUFZ8+ehbe3N5ydnXXbt2/fjtu3b2PUqFFQq9XYuHEjzp07BxcXF/Tq1QtNmzYt8Zj79u3D4cOHIZVK0axZM/Tq1Qu1a9c2iFMqlQCKOzIfpd22a9cujBo1CkBxZ9D27dvRvn17NG3aFHv37kV8fDxsbGzQpUsXPPXUUwbH+fXXXwEAQ4cOxZUrV7BlyxbcuXMHfn5+6Nu3r9F2ERFVeoKIiKqVDRs2CADCyclJvPPOO2Lbtm3i5s2bpX7Gx8dH+Pj4GN23d+9eAUC0atVK3LlzR7c9KytLNG/eXAAQ+/bt021PTEwUUqlU1K9fX6SmpuodS6PRiGvXrum+j4qKEgDE7t27dW13cHAQLVq0EJcvXzb7mn18fIRMJhNRUVFGvxYvXqyLValUwsHBQfj7+xscZ9OmTQKAePfddx/7+lNTUwUAER4ebtDGkn7Gzz//vHj0X8mP/mzM+cyPP/4oAIjnn39e5Ofn67ZfvnxZ1K1bV9jY2IiUlBTd9mXLlgkAwsbGRhw4cEC3vaioSHTr1k0AEHFxcUbP/6iS2qtWq4Wfn58AIGJiYvT2vffeewKAGD16tN52Hx8fAUCEhISIe/fumXX+h3Xo0EFIpVLds7Zr1y4BQIwcOdIg9uuvvxYARM+ePQ3Ode/ePXHr1i3d90OGDBEAxIcffmhwHKVSKQoLC4UQQmRnZwtnZ2dhb28vjh8/rotRq9Vi2LBhAoD45JNPdNtLema0tPf0Ydr736VLF5Gdna3bfubMGWFjYyNatGihF2/qeTLl8OHDIioqSnz44YfilVdeEU5OTsLR0VFs2bJFL+7jjz8WAMR7772n+/3QfkmlUvHOO+8YHFvbtke/XFxcxI8//mgQ37VrVwFAnDp1ymDfxIkTBQARFBSk27Zz504BQAwaNEgMGDDA4Dy9e/fW+90WQghvb28BQCxcuFDY29vrxderV++xf45ERNbE5J+IqBqaPXu2qF27tt5/sPr5+Ynx48eLc+fOGcSXlpiOHj1aABC//vqrwb6VK1caJG8RERECgFi6dKnJdj6ckHz//fdCJpO
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/8AAAGKCAYAAABacvENAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhzlJREFUeJzs3Xl8TNffB/DPzGSZJJLJQjYiYhdBrBG1lIZQlFYVpRTlR6m1rWoRdFFqp+VRWxVVWrVURYLaKoSQErGVEEuWEsmEyDZznj/SGcZMkglJJpl83q+X52nu/c6958698vO955zvkQghBIiIiIiIiIjIbElN3QAiIiIiIiIiKllM/omIiIiIiIjMHJN/IiIiIiIiIjPH5J+IiIiIiIjIzDH5JyIiIiIiIjJzTP6JiIiIiIiIzByTfyIiIiIiIiIzx+SfiIiIiIiIyMwx+SciIiIiIiIyc0z+iYjIZGbOnAmJRIJDhw6ZuinF5saNG5BIJHj33XdL9Dwvv/wyJBJJiZ6jqMLCwvDSSy/ByckJEokEvXv3NnWTiIiI6D9M/omIzNCjR4/w1VdfoVmzZqhUqRKsra1RrVo1tGvXDlOnTsW1a9d04mvUqIEaNWqYprHFoEaNGpBIJAX+uXHjhqmbWSTl7cXIjRs30KtXL1y/fh1Dhw5FSEgI+vfvb7L2CCFQu3ZtSCQSdO/e3WTtKCue93k6efIkhgwZAj8/Pzg7O0Mul6N27dro168fTp8+XeBnf/vtN3Tu3BkuLi6Qy+Xw8fHBgAEDcOvWLYNtK8rf3YLiC3rxFhcXhxEjRsDb2xvW1tZwc3NDx44dsW3bNr3Yx48fY+HChWjWrBmcnJzg6OiIJk2a4Msvv0RaWppR3x8RUVliYeoGEBFR8UpPT0fbtm1x7tw51K5dG4MGDYKLiwvu3buHyMhIfP3116hVqxZq1apl6qYWK5lMhmnTpuW739HRsfQaUwo2bNiAjIwMUzdDa//+/cjMzMSCBQvw9ttvm7o5OHToEK5duwaJRIJ9+/bh7t278PT0NHWzyp2jR48iPDwcrVu3RqdOnWBra4vr169j165d2LZtG3744Qe88847Op8RQmDUqFFYtWoVatWqhf79+8Pe3h53797F4cOHcfPmTXh5eemda8iQIQZfQub3d9fb29tgou/v728wPjw8XDsapWfPnqhZsyYePHiAc+fOYf/+/ejbt682NicnBx07dsTJkyfh7++vPc+ff/6JadOm4aeffkJkZCRsbW0NnouIqCxi8k9EZGYWL16Mc+fO4b333sOqVav0hobHxcUhKyvLRK0rORYWFpg5c6apm1Fqqlevbuom6Lh79y4AlJkEe82aNQCAyZMnY/78+Vi/fj0+/fRTE7eq/Bk7diw+/PBDve0xMTFo2bIlPvzwQwwaNEjn98zSpUuxatUqvP/++1i6dClkMpnOZ3Nzcw2e691338XLL79sdNtq1Khh9N/5+Ph4vPnmm6hatSr279+v9/fn2Tb99ttvOHnyJF5//XVs375dZ1/v3r2xc+dO/PLLLxg8eLDR7SUiMjUO+yciMjMREREAgDFjxhicE+7j44P69esDeDI//ebNm7h586bO0Nln/1G9bt06BAQEoFKlSqhUqRICAgKwfv36fNtx5MgR9O7dG25ubrC2toaXlxfeeOMNHDt2rNBriImJQbVq1eDk5GRUfFG88sorkEqluHnzpsH948aNg0QiQXh4uM72ol7/swqaWvHs/P2XX34Zs2bNAgB07NhRe0+e/nx+c/5zc3OxcOFCNGnSBDY2NlAoFOjYsSN2796tF7t+/XpIJBKsX78eYWFhaNOmDWxtbeHi4oIhQ4bg/v37hV6X5hkKCQnRa+/TQ8xjYmLw1ltvwdXVFdbW1vDx8cGECRMMnkPzXaWmpmLs2LHw8vKChYWF0d93amoqfv31V/j5+WH27Nmwt7fH2rVrIYQwGC+EwLp169CuXTs4OjrC1tYWderUwf/+9z/Ex8frxKanp2PWrFlo3LgxbG1toVAo0LRpU0yfPh05OTk6sX/99Re6d++uHS5fv359hISE6I3YKKxOhEQi0UuKNfc/JycHM2fORI0aNWBtbY26deviu+++04st7HnKj1wuN7jdz88PDRo0QHJyMpRKpXb748ePMWvWLNSsWRNLlizRS/yBvBd1pe2rr76CUqnEypUrDb44e7ZN169fBwB069ZNL1YzjeTff/8tgZYSEZUc9vwTEZkZFxcXAMCVK1fyHf6q4ejoiJCQECxevBgAMGHCBO2+p5ONcePGYdmyZahatSqGDx8OAPj1118xdOhQnD17FkuWLNE57pIlSzBx4kTY2Njg9ddfR/Xq1XHnzh0cO3YMv/zyC9q2bZtvm44dO4aePXvCzs4OR48ehZ+fn/EXb4R33nkHBw8exKZNm/R6gnNzc7FlyxZ4enrilVde0W4v6vW/KE0SePjwYZ2h0IVNXRBC4M0338TOnTtRt25djBkzBo8ePcLPP/+M1157DQsXLsTEiRP1Prdr1y7s2bMHPXv2RJs2bXDkyBFs2LAB165dK/Tli+YZOnTokF57Nf//2LFjCA4ORnZ2Nt58803UqFEDERERWLJkCX7//XecOHEClStX1jluVlYWOnXqhIcPH+K1116DhYUF3NzcCv3uAGDz5s3IzMzE4MGDYWNjgzfffBPr1q3D4cOH9ZJotVqNfv364ZdffkHVqlUxYMAAODg44MaNG9i6dSu6deumTRaTk5PRoUMHXLp0Cf7+/hg9ejTUajUuXbqEuXPnYvLkydp7tG3bNgwYMADW1tbo168fXF1dERYWhtmzZ2Pfvn04dOhQvol1UQwYMACRkZHo1q0bZDIZtm7dijFjxsDS0hIjRowA8PzPU0GuXbuGy5cvw8vLCwqFQrs9LCwMDx48wNChQ6FSqbBr1y5cuXIFjo6OCAoKQu3atfM95pEjR3Dy5ElIpVLUqVMHQUFBqFSpUr7xqampWLVqFe7duwdnZ2e89NJLaNSokV6cEALbtm2Di4sLOnXqhKioKBw+fBhqtRr+/v7o1KkTpFLd/jDN7529e/dqv0eNPXv2QCKRoGPHjkZ9V0REZYYgIiKzsnPnTgFA2Nvbi8mTJ4t9+/aJe/fuFfgZb29v4e3tbXDf4cOHBQDRoEEDkZqaqt2ekpIi6tatKwCII0eOaLdHR0cLqVQqPD09RVxcnM6x1Gq1uHPnjvbnkJAQAUD8+eef2rbb2NiIevXqiZs3bxp9zd7e3kImk4mQkBCDf1asWKGNVSqVwsbGRvj6+uodZ/fu3QKA+PDDD5/7+uPi4gQAMWTIEL025vcdd+jQQTz7P8nPfjfGfOaHH34QAESHDh1EVlaWdvvNmzdF5cqVhYWFhbh27Zp2+7p16wQAYWFhIY4dO6bdnpubK15++WUBQERERBg8/7Pya69KpRK1atUSAERoaKjOvo8++kgAEMOGDdPZ7u3tLQCI4OBgkZGRYdT5n9asWTMhlUq1z9rBgwcFADFo0CC92GXLlgkA4pVXXtE7V0ZGhrh//7725z59+ggA4tNPP9U7TmJiosjJyRFCCJGWliYUCoWwtrYWf//9tzZGpVKJfv36CQBi9uzZ2u35PTMamnv6NM39DwgIEGlpadrtly5dEhYWFqJevXo68YU9T4U5efKkCAkJEZ9++qkYOHCgsLe3F7a2tmLPnj06cdOnTxcAxEcffaT9+6H5I5VKxeTJk/WOrWnbs38cHR3FDz/8kO93YuhP165dRVJSkk7stWvXBADRokULMXLkSL3PNG3aVNy6dUvnM2q1WvTu3VsAEP7+/mLChAliwoQJokmTJsLBwUF8//33z/U9EhGZEpN/IiIztGDBAlGpUiWdf+DWqlVLjBkzRly5ckUvvqDEdNiwYQKA+Pnnn/X2bdq0SS95Gz16tAAg1q5dW2g7n05IVq9eLWQymWjVqpX4999/jb9Y8SRZzO9PkyZNdOIHDBggAIioqCid7W+99ZYAIKKjo5/7+k2Z/Hfq1EkAECdPntSL//LLL/WSTk3yP3jwYL14zb6lS5caPP+z8mvvkSNHBAD
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+sAAAGKCAYAAAB0N9CfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAf7lJREFUeJzt3Xd8U+X+B/BPkrbpbilQuksps5Q9i8qSMlTWVS7iRVBErwgOcPwuLqheBb0gzot6EVARQVEZDqDIKIVCGUUoZbaFMjpoS5vupsnz+6MmEpK0SUdO2n7er1demnOec873JA9pvnmWTAghQERERERERER2Qy51AERERERERERkiMk6ERERERERkZ1hsk5ERERERERkZ5isExEREREREdkZJutEREREREREdobJOhEREREREZGdYbJOREREREREZGeYrBMRERERERHZGSbrRERERERERHaGyToRUTO2ePFiyGQy7N27V+pQGsylS5cgk8nwyCOPNOp1hg8fDplM1qjXsNbOnTtxxx13oFWrVpDJZJg0aZLUIREREVEjYbJORFQHJSUlePvtt9G3b1+4u7tDqVQiKCgId911FxYuXIjU1FSD8u3bt0f79u2lCbYBtG/fHjKZrMbHpUuXpA7TKk3th4xLly5h4sSJSEtLw6OPPopFixbhwQcflCweIQQ6duwImUyGe++9V7I47EVd61N8fDyef/559OvXD61bt4azszO6du2K//u//0NBQYHZ444cOYJ77rkH3t7ecHNzw+DBg/Hdd9+ZLZ+ZmYnHHnsM/v7+cHZ2RpcuXfDWW29BrVYblU1NTcXixYsxYcIEBAYGQiaT1fj5deHCBbz99tsYOnQoAgIC4OTkhODgYMyYMQNnz541e9z27dsxatQoeHt7w8XFBT169MB7770HjUZj9piioiIsWrQIkZGRcHV1hbe3N/r27YuYmJh6v05xcXF44YUXMGLECHh5ednkR0Eiopo4SB0AEVFTU1RUhDvvvBMnT55Ex44dMX36dLRu3Rq5ublITEzE0qVLER4ejvDwcKlDbVAKhQKvvvqq2f3e3t62C8YGvvrqK5SWlkodht6uXbtQXl6O5cuX46GHHpI6HOzduxepqamQyWTYsWMHrl+/joCAAKnDanIeeOAB5Obm4s4778SMGTP0Cf+7776LTZs24eDBg2jXrp3BMXv27MGYMWPg7OyMBx98EB4eHvjhhx8wdepUXLlyBc8//7xB+aysLAwaNAhXr17F5MmT0alTJ+zbtw+vvvoqEhMTsXnzZoNeJPv370dMTAwUCgW6deuGrKysGu/htddew8aNGxEZGYmJEyfC09MTp06dwtdff41NmzZh+/btGDp0qMExH374IZ599ll4enrib3/7G7y9vbFr1y48//zzSEhIwPfff290nYyMDIwcORJpaWkYNWoU7r33XlRUVODixYv44YcfsGjRonq9TqtXr8aXX34JV1dXhISEQKVS1XjfRESNThARkVXeeOMNAUDMnj1baLVao/1paWnizJkzBttCQ0NFaGiojSL8y6JFiwQAsWfPnnqdJzQ0VCiVyoYJqp7S09MFADFz5sx6naehXhtbiYmJsat4//GPfwgA4oUXXhAAxFtvvSV1SJKqa31aunSpuHbtmsE2rVYr5syZIwCIp556ymCfWq0W4eHhQqlUiqSkJP32goIC0blzZ+Hk5CQuXbpkcMyMGTMEALFy5UqDazz44IMCgFi/fr1B+dTUVJGQkCBKS0uFEEIolcoaP7/WrFkjjh8/brT922+/FQBERESEwfZr164JpVIpWrVqZRCrWq0WEydOFADEt99+a3Tf/fv3Fy4uLmL37t1G11Kr1UbPrX2djhw5IpKTk0VVVZVISEhokM8ZIqL6YLJORGSlcePGCQAGXwDN0SWWph6LFi0yKLt69WoxcOBA4ebmJtzc3MTAgQPFmjVrzJ573759YuLEicLX11c4OTmJoKAgMXnyZLF//359GXMJxKlTp0RgYKDw9vY2KG+ONcn6yJEjhUwmM/oirPP0008LAGLnzp0G2y29f3PJek0/iAwbNkzc+vu07vntj1uPv/0YHbVaLZYvXy569uwpnJ2dhaenpxg+fLjYunWrUdk1a9YIAGLNmjVix44dIioqSri4uAgfHx8xY8YMkZubazJeU/dr6nHr+3rq1CkxZcoU0bZtW+Hk5CTat28vnn32WZPX0L1WN2/eFHPnzhVBQUFCoVDUWN9udfPmTeHs7CwiIyNFaWmp8PDwEOHh4SZ/vBKiOjFcvXq1uPPOO4WXl5dwcXERHTt2FE888YS4fPmyQVmVSiUWL14sevToIVxcXISnp6fo3bu3ePXVV0VlZaVB2fj4eHHPPfeIVq1aCaVSKbp06SJef/11UVJSYvI1NJd4ARDDhg0z2KZ7/ysrK8WiRYtEaGiocHJyEp06dRKffPKJybI11SdrXb9+XQAQ3bt3N9i+Y8cOAUA8+uijRsesXbtWABAxMTH6bSqVSiiVStGhQwej9+fSpUsCgBgxYkSNsdSWrNekc+fOAoC4ceOGftuqVasEAPHiiy8alT9x4oQAIIYOHWqwXZf4v/baaxZd19rX6XZM1onIHrAbPBGRlVq3bg0AOH/+PHr37l1jWW9vbyxatAjvv/8+AOC5557T7xs+fLj+/5955hl89NFHCAwMxGOPPQYA+OGHH/Doo48iKSkJH3zwgcF5P/jgA8yfPx8uLi6YPHkyQkJCcO3aNcTHx2PTpk248847zcYUHx+P8ePHw83NDfv370dkZKTlN2+Bhx9+GLt378Y333yDl19+2WBfVVUVNmzYgICAANx999367dbef33pxqHu27cPM2fO1I/Hra0rvxACDzzwALZs2YLOnTtj7ty5KCkpwcaNGzFhwgS89957mD9/vtFxW7duxS+//ILx48djyJAhiIuLw1dffYXU1FTEx8fXeE1dHdq7d69RvLr/xsfHY8yYMaisrMQDDzyA9u3bIyEhAR988AF+/vlnHDp0CG3atDE4b0VFBUaOHIni4mJMmDABDg4ORt2tzVm/fj3Ky8sxY8YMuLi44IEHHsCaNWuwb98+g3oNAFqtFlOnTsWmTZsQGBiIadOmwdPTE5cuXcJ3332HcePGISQkBACQk5ODYcOG4ezZs+jduzfmzJkDrVaLs2fP4p133sHzzz+vf4++//57TJs2DUqlElOnToWvry927tyJN954Azt27MDevXvh7Oxs0f3UZNq0aUhMTMS4ceOgUCjw3XffYe7cuXB0dMTjjz8OoO71qSaOjo4AAAcHw69qujHxo0ePNjpmzJgx+jh0EhISUFFRgejoaKMJE0NDQ9GlSxccOHAAGo0GCoWizvGaY+o+dN3qw8LCjMrrth08eBAVFRVQKpUAgI0bNwIApkyZgitXruCXX35BQUEBwsPDMW7cOLi7uxucx9rXiYjILkn9awERUVOzZcsWAUB4eHiI559/XuzYsaPWFtKaWn337dsnAIhu3bqJgoIC/fb8/Hx9q1RcXJx++4kTJ4RcLhcBAQEiPT3d4FxardagS+3tLetbtmwRLi4uokuXLkYtmrXFr1AoxKJFi0w+bu1eq1KphIuLi1HXVyGE2LZtm77rdF3vvyFa1oWovduyqWO+/PJLfStsRUWFfvvly5dFmzZthIODg0hNTdVv17WsOzg4iPj4eP32qqoqMXz4cAFAJCQkmLz+7czFq9FoRHh4uAAgtm/fbrDvxRdfFADErFmzDLaHhoYKAGLMmDH6rs7W6Nu3r5DL5fq6tnv3bgFATJ8+3ajsRx99JACIu+++2+hapaWlIi8vT//8/vvvFwDEyy+/bHSerKwsfVfnwsJC4eXlJZRKpfjjjz/0ZTQajZg6daoAIN544w399vq0rA8aNEgUFhbqt589e1Y4ODiILl26GJRv6GEV77zzjsnW5wceeEAAEEePHjV5nLu7uwgODtY///jjjwUAsWzZMpPl77vvPgHAoN7erq4t64cPHxYAxIABAwy2f/rpp7W2rAMQKSkp+u3BwcECgPj444+FUqk06MHQtm1bo9fd2tf
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+0AAAGKCAYAAAB5KaDYAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAVEpJREFUeJzt3Xd41FXa//HPpE0oKQQISei9CFIFo670qmABBRRERLFhAWxYQPZZRfmhosjKoyLsrqCProWyCkTpLoI0QZqKQVQIiNlUIJlkzu+P7IwOSSAJmZnvMO/XdeWS+db7zNwM3jnne47NGGMEAAAAAAAsJ8TfAQAAAAAAgJJRtAMAAAAAYFEU7QAAAAAAWBRFOwAAAAAAFkXRDgAAAACARVG0AwAAAABgURTtAAAAAABYFEU7AAAAAAAWRdEOAAAAAIBFUbQDACrk6aefls1m09q1a/0dSqU5dOiQbDabbr31Vq/ep0ePHrLZbF69R3mtWrVKl19+uWrUqCGbzaZrr73W3yEBAABRtAOA5eTm5urZZ59Vp06dVL16ddntdtWrV09/+tOfNGXKFB08eNDj+EaNGqlRo0b+CbYSNGrUSDab7aw/hw4d8neY5RJov9A4dOiQrrnmGv3www8aO3aspk2bphEjRvgtHmOMmjVrJpvNpquuuspvcVhFZeVTfn6+OnToIJvNplatWpV63FdffaVBgwYpNjZW1apV06WXXqr33nuv2HG5ubl6++23deONN6pFixaqUqWKYmNj1b17d73zzjuVFtPbb7+tO++8U126dJHdbpfNZtPChQvPet2srCxNmjRJDRs2lN1uV6NGjfTwww8rJyenTHEBgJWE+TsAAMDvsrOzdcUVV2jXrl1q1qyZRo0apZo1a+rEiRPasmWLnnvuOTVt2lRNmzb1d6iVKjQ0VE8++WSp+2NjY30XjA/8/e9/18mTJ/0dhttnn32m06dP64UXXtBNN93k73C0du1aHTx4UDabTStXrtSRI0eUlJTk77AC3vTp0/X999+f9Zg1a9aof//+ioyM1IgRIxQVFaUPPvhAw4cP108//aTJkye7j92wYYNGjx6tmjVrqnfv3ho6dKiOHz+uDz/8UDfddJO++OILvfrqq+cd05NPPqkff/xRtWrVUmJion788cezHp+bm6vu3btr586d6tevn0aOHKkdO3Zo1qxZWrdundavX6/IyMizXgMArISiHQAsZPbs2dq1a5duv/12vf7668WGUKempiovL89P0XlPWFiYnn76aX+H4TMNGjTwdwgejhw5IkmWKYznz58vSZo8ebJmzZqlhQsX6vHHH/dzVIFty5Ytev755/Xyyy9rwoQJJR5TUFCgO+64QyEhIVq/fr06dOggSZo6daq6du2qxx9/XMOGDVPDhg0lSQkJCfrHP/6hG2+8UREREe7rPPvss+rWrZvmzp2rW265RV27dq1wTJL05ptvqnnz5mrYsKGee+45TZky5axtnTlzpnbu3KlHH31Uzz33nHv7Y489pueff14vvfTSOa8BAJZiAACWMXDgQCPJ7Nix45zHpqamGkkl/kybNs3j2Lfeest07drVVKtWzVSrVs107drVLFiwoNRrr1u3zlxzzTUmPj7eREREmHr16pnrrrvObNiwwX3MtGnTjCSzZs0aj3N3795t6tata2JjYz2OL03Dhg2N3W4/53HGGNOrVy9js9nMoUOHStx/3333GUlm1apVHtvL2n7XezpmzJhiMTZs2LDEe3bv3t388Z9T1+szf/54/pnnuDgcDvPCCy+Yiy++2ERGRpro6GjTo0cPs3Tp0mLHLliwwEgyCxYsMCtXrjTJycmmSpUqJi4uztxyyy3mxIkTJcZbUntL+vnj57p7925zww03mNq1a5uIiAjTqFEj88ADD5R4D9d79Z///Mfce++9pl69eiY0NPSs+fZH//nPf0xkZKRp27atOXnypImKijJNmzY1TqezxOOdTqd56623zBVXXGFiYmJMlSpVTLNmzcz48ePNjz/+6HFsVlaWefrpp027du1MlSpVTHR0tOnQoYN58sknTX5+vsexGzduNIMGDTI1atQwdrvdtGzZ0kydOtXk5uaW+B6emTMukkz37t09trk+//z8fDNt2jTTsGFDExERYZo3b27mzp1b4rFny6dzOXXqlGnVqpXp3r27cTqdRpJp2bJlseNWrlxpJJmxY8cW27dw4UIjyUyfPr1M93z22WeNJPP//t//O6+YzjRjxgx33pfE6XSapKQkU716dZOTk+OxLycnx1SvXt00adKkTG0AAKugpx0ALKRmzZqSpG+//dbdy1Wa2NhYTZs2TbNnz5YkPfjgg+59PXr0cP/5/vvv15w5c1S3bl2NGzdOkvTBBx9o7Nix2rFjh15++WWP67788suaOHGiqlSpouuuu04NGjTQL7/8oo0bN+qf//ynrrjiilJj2rhxowYPHqxq1appw4YNatu2bdkbXwajR4/W6tWrtWjRomI9rwUFBXr33XeVlJSk3r17u7eXt/3nyzWJ3bp16zRmzBj3fAPnGuJvjNGwYcO0ZMkStWjRQvfee69yc3P1f//3fxoyZIhefPFFTZw4sdh5S5cu1b/+9S8NHjxYl112mdavX6+///3vOnjwoDZu3HjWe7pyaO3atcXidf1348aN6t+/v/Lz8zVs2DA1atRImzZt0ssvv6zly5fryy+/VK1atTyum5eXp169eiknJ0dDhgxRWFiY6tSpc873TpIWL16s06dP65ZbblGVKlU0bNgwLViwQOvWrfPIa0lyOp0aPny4/vnPf6pu3boaOXKkoqOjdejQIb333nsaOHCge1TD8ePH1b17d+3fv18dOnTQ3XffLafTqf379+v555/X5MmT3Z/R+++/r5EjR8put2v48OGKj4/XqlWr9Oc//1krV67U2rVrK2V49ciRI7VlyxYNHDhQoaGheu+993TvvfcqPDxcd9xxh6SK59MfPf744zp8+LCWL19+1gkQXc/M9+vXr9i+/v37u+Moi/DwcElFo2jOJ6by+u6773TkyBH1799f1apV89hXrVo1XX755Vq5cqV++ukn1a9fv9LuCwBe5e/fGgAAfrdkyRIjyURFRZnJkyeblStXnrPH9Gy9wOvWrTOSTOvWrU1GRoZ7e3p6umnRooWRZNavX+/evnPnThMSEmKSkpJMamqqx7WcTqf55Zdf3K/P7GlfsmSJqVKlimnZsmWxHs5zxR8aGmqmTZtW4s9rr73mPjYrK8tUqVLFtGnTpth1li1bZiSZhx56qMLtr4yedmNKH4VwtnP+9re/uXtl8/Ly3Nt//PFHU6tWLRMWFmYOHjzo3u7qaQ8LCzMbN250by8oKDA9evQwksymTZtKvP+ZSou3sLDQNG3a1EgyK1as8Nj38MMPG0nmtttu89jesGFDI8n079/fnDx5skz3/6NOnTqZkJAQd66tXr3aSDKjRo0qduycOXOMJNO7d+9i9zp58qT57bff3K+HDh1qJJnHH3+82HXS0tKMw+EwxhiTmZlpYmJijN1uN19//bX7mMLCQjN8+HAjyfz5z392bz+fnvZu3bqZzMxM9/b9+/ebsLCwYj3O58qns1m3bp0JCQkxs2fP9oippF7tYcOGGUlm69atJV6revXqpn79+ue8Z0FBgWnXrp2x2Wxm9+7d5xXTmc7V0758+XIjyUyYMKHE/RMmTDCSzOeff37OewGAVTB7PABYyJAhQ/TCCy/IGKMXXnhB/fv3V61atdSsWTNNmDBB3333Xbmu97e//U1S0ezTMTEx7u01atTQtGnTJMljFub//d//ldPp1F/+8pdiM9LbbLZSn3meP3++rr/+erVr104bN24s9zPbhYWFmj59eok/8+bNcx8XFRWla6+9Vnv37tX27ds9rvGPf/xDkjRq1KgKt9+fXLHOnDnT4/ngBg0aaOLEiSooKNCiRYuKnXfTTTfp8ssvd78ODQ3VmDFjJBXNAn4+vvjiCx08eFADBw5097S6TJ06VXFxcVq8eLHy8/OLnTtz5kxVqVKlXPfbuXOntm/frt69e7tzrUe
"text/plain": [
"<Figure size 1200x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_account('200001928')\n",
"plot_account('366351')\n",
"plot_account('365966')\n",
"plot_account('365568')\n",
"plot_account('200129601')\n",
"plot_account('402410')\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "31407450-a833-4fce-8b0b-dba1b1de585f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>n_ruptures</th>\n",
" <th>obs</th>\n",
" <th>rupture_ratio</th>\n",
" <th>max_gap</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17027</th>\n",
" <td>200127410</td>\n",
" <td>FR0010135103</td>\n",
" <td>434</td>\n",
" <td>436</td>\n",
" <td>0.995413</td>\n",
" <td>295985.420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17029</th>\n",
" <td>200127410</td>\n",
" <td>FR0010148981</td>\n",
" <td>317</td>\n",
" <td>319</td>\n",
" <td>0.993730</td>\n",
" <td>67134.706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68901</th>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>LU0992630599</td>\n",
" <td>154</td>\n",
" <td>155</td>\n",
" <td>0.993548</td>\n",
" <td>529752.634</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39099</th>\n",
" <td>366441</td>\n",
" <td>FR0010135103</td>\n",
" <td>142</td>\n",
" <td>143</td>\n",
" <td>0.993007</td>\n",
" <td>439160.588</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39101</th>\n",
" <td>366441</td>\n",
" <td>FR0010148981</td>\n",
" <td>142</td>\n",
" <td>143</td>\n",
" <td>0.993007</td>\n",
" <td>86246.897</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3083</th>\n",
" <td>200001928</td>\n",
" <td>LU0992624949</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>110779.418</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2080</th>\n",
" <td>200001349</td>\n",
" <td>FR0010149120</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>23881.992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3084</th>\n",
" <td>200001928</td>\n",
" <td>LU0992625839</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>12675.630</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31744</th>\n",
" <td>365095</td>\n",
" <td>FR0010149120</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>22990.942</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2085</th>\n",
" <td>200001349</td>\n",
" <td>FR0010149302</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>412.499</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65735</th>\n",
" <td>422329</td>\n",
" <td>FR0010306142</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>18289.694</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3129</th>\n",
" <td>200001939</td>\n",
" <td>LU0592698954</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>57179.957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2090</th>\n",
" <td>200001349</td>\n",
" <td>FR0011269083</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>29403.491</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2086</th>\n",
" <td>200001349</td>\n",
" <td>FR0010306142</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>9417.847</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65730</th>\n",
" <td>422329</td>\n",
" <td>FR0010149120</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>20342.726</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2108</th>\n",
" <td>200001349</td>\n",
" <td>LU0336083497</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>592.246</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65618</th>\n",
" <td>422310</td>\n",
" <td>FR0010149120</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>6006.071</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65613</th>\n",
" <td>422310</td>\n",
" <td>FR0010135103</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>4258.656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31804</th>\n",
" <td>365096</td>\n",
" <td>FR0010149120</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>844.519</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31800</th>\n",
" <td>365096</td>\n",
" <td>FR0010148981</td>\n",
" <td>129</td>\n",
" <td>130</td>\n",
" <td>0.992308</td>\n",
" <td>962.783</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin n_ruptures obs rupture_ratio \\\n",
"17027 200127410 FR0010135103 434 436 0.995413 \n",
"17029 200127410 FR0010148981 317 319 0.993730 \n",
"68901 PRIVATE CLIENT LU0992630599 154 155 0.993548 \n",
"39099 366441 FR0010135103 142 143 0.993007 \n",
"39101 366441 FR0010148981 142 143 0.993007 \n",
"3083 200001928 LU0992624949 129 130 0.992308 \n",
"2080 200001349 FR0010149120 129 130 0.992308 \n",
"3084 200001928 LU0992625839 129 130 0.992308 \n",
"31744 365095 FR0010149120 129 130 0.992308 \n",
"2085 200001349 FR0010149302 129 130 0.992308 \n",
"65735 422329 FR0010306142 129 130 0.992308 \n",
"3129 200001939 LU0592698954 129 130 0.992308 \n",
"2090 200001349 FR0011269083 129 130 0.992308 \n",
"2086 200001349 FR0010306142 129 130 0.992308 \n",
"65730 422329 FR0010149120 129 130 0.992308 \n",
"2108 200001349 LU0336083497 129 130 0.992308 \n",
"65618 422310 FR0010149120 129 130 0.992308 \n",
"65613 422310 FR0010135103 129 130 0.992308 \n",
"31804 365096 FR0010149120 129 130 0.992308 \n",
"31800 365096 FR0010148981 129 130 0.992308 \n",
"\n",
" max_gap \n",
"17027 295985.420 \n",
"17029 67134.706 \n",
"68901 529752.634 \n",
"39099 439160.588 \n",
"39101 86246.897 \n",
"3083 110779.418 \n",
"2080 23881.992 \n",
"3084 12675.630 \n",
"31744 22990.942 \n",
"2085 412.499 \n",
"65735 18289.694 \n",
"3129 57179.957 \n",
"2090 29403.491 \n",
"2086 9417.847 \n",
"65730 20342.726 \n",
"2108 592.246 \n",
"65618 6006.071 \n",
"65613 4258.656 \n",
"31804 844.519 \n",
"31800 962.783 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 1. Prepare stock dataset ISIN-by-ISIN\n",
"stocks_isin = stocks[[\n",
" \"Registrar Account - ID\", \"Product - Isin\",\n",
" \"Centralisation Date\", \"Quantity - AUM\"\n",
"]].copy()\n",
"\n",
"stocks_isin[\"Centralisation Date\"] = pd.to_datetime(stocks_isin[\"Centralisation Date\"])\n",
"stocks_isin = stocks_isin.sort_values(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")\n",
"\n",
"# 2. Prepare flows dataset ISIN-by-ISIN\n",
"flows_isin = flows[[\n",
" \"Registrar Account - ID\", \"Product - Isin\",\n",
" \"Centralisation Date\", \"Quantity - NetFlows\"\n",
"]].copy()\n",
"\n",
"flows_isin[\"Centralisation Date\"] = pd.to_datetime(flows_isin[\"Centralisation Date\"])\n",
"\n",
"flows_isin = flows_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"]\n",
")[\"Quantity - NetFlows\"].sum().reset_index()\n",
"\n",
"# 3. Merge stocks & flows ISIN-by-ISIN\n",
"merged_isin = stocks_isin.merge(\n",
" flows_isin,\n",
" on=[\"Registrar Account - ID\", \"Product - Isin\", \"Centralisation Date\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"merged_isin[\"Quantity - NetFlows\"] = merged_isin[\"Quantity - NetFlows\"].fillna(0)\n",
"\n",
"# 4. Compute expected stock per ISIN for each account\n",
"merged_isin[\"prev_stock\"] = merged_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - AUM\"].shift(1)\n",
"\n",
"merged_isin[\"prev_netflows\"] = merged_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
")[\"Quantity - NetFlows\"].shift(1).fillna(0)\n",
"\n",
"merged_isin[\"expected_stock\"] = merged_isin[\"prev_stock\"] + merged_isin[\"prev_netflows\"]\n",
"\n",
"# 5. Detect ruptures ISIN-by-ISIN (no aggregation)\n",
"TOL = 1e-6\n",
"merged_isin[\"gap\"] = merged_isin[\"Quantity - AUM\"] - merged_isin[\"expected_stock\"]\n",
"merged_isin[\"rupture_flag\"] = (\n",
" merged_isin[\"prev_stock\"].notna()\n",
" & (merged_isin[\"gap\"].abs() > TOL)\n",
")\n",
"\n",
"# 6. Summarize ruptures per (Account, ISIN)\n",
"rupture_isin_summary = merged_isin.groupby(\n",
" [\"Registrar Account - ID\", \"Product - Isin\"]\n",
").agg(\n",
" n_ruptures=(\"rupture_flag\", \"sum\"),\n",
" obs=(\"rupture_flag\", \"count\"),\n",
" rupture_ratio=(\"rupture_flag\", \"mean\"),\n",
" max_gap=(\"gap\", lambda x: x.abs().max())\n",
").reset_index()\n",
"\n",
"# Sort by worst ISIN trajectories\n",
"rupture_isin_summary = rupture_isin_summary.sort_values(\n",
" \"rupture_ratio\", ascending=False\n",
")\n",
"\n",
"rupture_isin_summary.head(20)\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "baa4b6cd-887d-45a6-af27-253a9aa8710f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Strong ruptures (ratio > 0.5 OR huge max_gap)\n",
"strong = rupture_isin_summary[\n",
" (rupture_isin_summary[\"rupture_ratio\"] > 0.5)\n",
" | (rupture_isin_summary[\"max_gap\"] > 50000)\n",
"]\n",
"\n",
"def find_successors(account_id, isin, window_days=15):\n",
" # Extract rupture dates for (account_id, isin)\n",
" ruptures = merged_isin[\n",
" (merged_isin[\"Registrar Account - ID\"] == account_id)\n",
" & (merged_isin[\"Product - Isin\"] == isin)\n",
" & (merged_isin[\"rupture_flag\"])\n",
" ][\"Centralisation Date\"].unique()\n",
"\n",
" if len(ruptures) == 0:\n",
" return []\n",
"\n",
" candidates = []\n",
"\n",
" for rupture_date in ruptures:\n",
" start = rupture_date - pd.Timedelta(days=window_days)\n",
" end = rupture_date + pd.Timedelta(days=window_days)\n",
"\n",
" # Look for accounts with strong positive jump at the same time\n",
" window_df = merged_isin[\n",
" (merged_isin[\"Centralisation Date\"] >= start)\n",
" & (merged_isin[\"Centralisation Date\"] <= end)\n",
" & (merged_isin[\"Product - Isin\"] == isin)\n",
" ]\n",
"\n",
" # Look for positive gap (jump)\n",
" pos_jumps = window_df[window_df[\"gap\"] > 0]\n",
"\n",
" candidates.extend(pos_jumps[\"Registrar Account - ID\"].unique())\n",
"\n",
" # Remove self\n",
" candidates = [c for c in candidates if c != account_id]\n",
"\n",
" return list(set(candidates))\n",
"\n",
"find_successors(\"200129601\", \"FR0010135103\")\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "0b834da2-f781-476d-84a6-aebb38fb8dac",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABCcAAAJ0CAYAAAAh/0nHAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd4VMXewPHvbnrvoYUk1NB77x2k9yJKERuKckUUC76AoIh6UbwIFlBsiCAgvUhv0nsSeglJIL2Rnuy8f2yyyZJNoYSN+vs8Tx7YU+bMmT1zzuycKRqllEIIIYQQQgghhBDCTLTmjoAQQgghhBBCCCH+3aRyQgghhBBCCCGEEGYllRNCCCGEEEIIIYQwK6mcEEIIIYQQQgghhFlJ5YQQQgghhBBCCCHMSionhBBCCCGEEEIIYVZSOSGEEEIIIYQQQgizksoJIYQQQgghhBBCmJVUTgghhBBCCCGEEMKsLM0dASGEEEIIIR6VkJAQvvvuO1xcXJg8eTJarbyLE0KIvwOpnBBCCCGEEP8IGRkZDBs2jNOnT7NlyxapmBBCiL8RuWOLMsXf3x+NRsOyZcuK3K5Tp05oNBpmzpz5WOIlHlzud7Vnz55SP1bu9XPjxo1SP5Z4NMaNG1eiPF+WzZw5U+5HAoAFCxag0WhYvXq1uaPyrzV16lSOHTvG999/T5cuXcwdnX+NZcuWodFoGDdu3EOF8+yzz2Jpacm5c+ceTcSEEH8rUjkhxEPQaDRoNBpzR0OIMulRFVZF2Xbjxg00Gg3+/v7mjopZRUVFMXPmTJo3b86QIUPMHZ1/pVWrVvG///2Pjz76iCeffNLc0SmWVGwWNHPmTKysrHj11VfNHRUhhBlI5YQQQgizmTt3LsHBwQwaNMjcURHiocyaNYv4+Hj5oWkmSilu3brF119/zZtvvmnu6PzrDBo0iODgYObOnftQ4fj4+PDss8+yZ88e1q9f/4hiJ4T4u5DKCSGEEGZToUIFatWqhYuLi7mjIsQDi4+PZ9myZVSqVIlevXqZOzr/ShqNhilTpvD888+bOyr/Si4uLtSqVYsKFSo8dFgTJkwA4PPPP3/osIQQfy9SOSH+kS5dusQLL7xAtWrVsLW1xcXFhQ4dOvDzzz+b3P7mzZvMmzePLl264Ovri42NDa6urrRr146vv/4anU5ntH1uU8xcud07cv9yxzzI36w9ISGBKVOm4O/vj62tLTVq1GDevHmGsMPCwnjhhReoXLkyNjY2BAQE8L///e+RxBeMm15nZWXx8ccfU7duXezs7PD09GT48OFcuHDhQZKbW7du8cwzz1ChQgXDub377rukpqYWu+/vv/9Or1698PLywtramkqVKvHUU08RFBT0QHEx5UHSqzj5u/R8++23NG3aFAcHB1xdXenduzeHDx82uV/+cTHWrVtHly5dcHd3LzAux4ULFxg/fjx+fn7Y2Njg7u5O165dWblypclw8zcPvnnzJmPGjDF8HzVr1mTmzJlFfh/btm2jb9++eHt7Y21tTcWKFRkxYgTHjx83uX3+sUT2799Pv3798PLyQqvVsmzZMvz9/Rk/fjwAP/zwg1H+6NSpkyGc4sacWLFiBV27dsXd3R0bGxv8/Px45plnuHTpUrHpu3v3bnr06IGbmxt2dnY0adKEH3/8sdA0KEpqaiozZ86kRo0a2NjYUKFCBcaOHUtISEix+544cYLRo0cbrj13d3d69uzJ5s2b7zsexaU7FJ+mhXW3yb88JiaGl19+2RBnPz8/XnvtNeLi4oz2GTduHFWqVAH0+ezee2H+7R42TrGxsfznP/+hWrVq2NjYGF1HADt37mTw4MFUqFABa2trvL29GTRoEH/99ZfJY16+fJlnnnmGKlWqYGNjg6OjI35+fvTp04fvv//e5D6F+f7770lOTubpp582OQBj/vM/c+YMgwcPxsvLCzs7Oxo0aMCCBQvIzs4usF9SUhLffvstgwcPpkaNGjg4OODg4ED9+vV59913iY+PNxmfkt5nCvMojns/+S//dX369GkGDx6Mp6cnNjY21KlTh//+978opQqN7/1+96DP0//9739p1aoVrq6u2NraEhAQwJtvvklMTEyxaWRKeHg4U6ZMoXbt2tjb2+Pk5ETz5s1ZuHAhWVlZRttqNBpmzZoF6Fvd5M83Je0Klz/dDh8+TJ8+ffDw8MDJyYmOHTuyf/9+w7Zbt26la9euuLm54ejoSPfu3Tl58mShYd/vM6iobnw7duygX79+lCtXDisrK9zc3KhRowZPPfUU+/btK7B9o0aNaNiwIbt37yY4OLhEaSGE+IdQQpQhfn5+ClDff/99kdt17NhRAWrGjBkF1q1cuVLZ2toqQNWqVUsNGjRIdenSRTk4OChAjR8/vsA+s2fPVoCqUqWK6tq1qxo5cqTq2LGjsra2VoAaPHiw0ul0hu3Xrl2rxo4dqwAFqLFjxxr9RUVFKaWU+v777xWgBgwYoGrXrq28vb3VkCFDVI8ePZSdnZ0C1KRJk9SVK1dU+fLlVeXKldXw4cNV586dlYWFhQLURx999NDxVUqp69evK0D5+fmpwYMHKysrK9WtWzc1cuRIVbVqVQUoR0dHdejQoRJ8U3mCg4OVt7e3AlSFChXUsGHDVO/evZWdnZ1q3bq1at26tQLU7t27jfbLzMxUw4cPV4CysbFRbdq0UcOGDVMNGzZUgLKzs1Nbtmy5r7jkXj/Xr19/6PQqTu53/9prrymNRqPatWunRo0aperVq6cAZWlpqdasWVNoHCdNmqQA1axZMzVq1CjVsWNHtW/fPqWUUhs3bjRcwwEBAWrkyJGqS5cuhmvimWeeKRDujBkzFKDGjBmjPDw8VLly5dSwYcNU3759Ddd+27ZtVWpqaoF9p0+frgCl0WhU27Zt1ahRo1SjRo0UoCwsLNTSpUsL7JObB1966SWl1WpVnTp11MiRI1WPHj3U8uXL1euvv67atm2rAFWtWjWj/DF37lxDOLn56N48r9Pp1JgxYwxp2aVLFzVy5EhVs2ZNBSh7e3uT10du+r733ntKo9Gopk2bqpEjR6pWrVoZvrPPPvusuK/XSHJysmF/BwcH1bdvXzVs2DBVrlw55eHhYYinqfvR559/rrRarQJUo0aN1NChQ1W7du0M196sWbPuKy7FpbtShadprtz70tixY00u79+/v6pWrZpydXVVAwcOVIMGDVJubm6G6zEyMtKwz7fffquGDBliSJt774W5HjZOffr0UVWqVFFubm6qf//+atiwYWr06NGG7V5//XUFKK1Wq1q0aKGGDRumWrZsqTQajbKwsFDfffedUbjnzp1Tzs7OhnMaPHiwGjZsmGrdurVydHRUDRs2LPJ7uFeHDh0UoHbs2GFyfe75T5w4Udna2ip/f381YsQI1aNHD8O1MHTo0AL3of379ytAeXl5qXbt2hn28fDwUICqXr26io6OLnC8ktxnivKwx73f/Jd7Xb/11lvK2tpa1a5d23Cfzr3vTZ482WRc7/e7V0qpsLAwVb9+fQUod3d31a1bNzVo0CBD/P39/dWNGzeKTaf89u7da8gn/v7+qn///qpnz56GZT169FAZGRmG7ceOHWt43jVs2NAo33z77bclOmZuuk2dOlVZWlqqxo0bqxEjRhju3zY2NurgwYNq4cKFSqvVqjZt2qjhw4cb7qOOjo7q8uXLBcJ9kGdQYXl42bJlSqPRKI1Go1q2bKlGjBih+vfvr5o0aaIsLCwK/V6nTp2qAPXhhx+WKC2EEP8MUjkhypSHrZw4e/assrGxUba2tmr16tVG627cuGEojPzwww9G644eParOnTtX4DhhYWGGwsPKlSsLrM8tbBUm92ENqH79+qnk5GTDuhMnTihLS0vDD4wXX3xRZWZmGtb/8ccfClDOzs5G+z1ofHMrJwDl6empzpw5Y1iXlZWlXnnlFUPlRVpaWqHndK/mzZsrQA0fPtzoh+/NmzdVtWrVDMe8t3LinXfeUYBq2bKlunbtmtG6VatWKQsLC+Xm5qbi4uJKHJfCKice9PstSu552dnZqZ07dxqt+/jjjxWgXFxcVEREhMk
"text/plain": [
"<Figure size 1400x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = merged_isin.copy()\n",
"\n",
"# Ajouter année / mois\n",
"df[\"year\"] = df[\"Centralisation Date\"].dt.year\n",
"df[\"month\"] = df[\"Centralisation Date\"].dt.month\n",
"\n",
"# 1. Nombre total de lignes par mois\n",
"total = df.groupby([\"year\", \"month\"]).size().reset_index(name=\"total_lines\")\n",
"\n",
"# 2. Nombre de ruptures par mois\n",
"ruptures = df[df[\"rupture_flag\"]].groupby([\"year\", \"month\"]).size().reset_index(name=\"n_ruptures\")\n",
"\n",
"# 3. Merge pour obtenir total + ruptures\n",
"ratio = total.merge(ruptures, on=[\"year\",\"month\"], how=\"left\")\n",
"ratio[\"n_ruptures\"] = ratio[\"n_ruptures\"].fillna(0)\n",
"\n",
"# 4. Proportion (en %)\n",
"ratio[\"rupture_ratio\"] = ratio[\"n_ruptures\"] / ratio[\"total_lines\"]\n",
"\n",
"# 5. Pivot pour heatmap\n",
"heatmap_ratio = ratio.pivot(index=\"year\", columns=\"month\", values=\"rupture_ratio\").fillna(0)\n",
"\n",
"# 6. Plot\n",
"plt.figure(figsize=(14, 7))\n",
"sns.heatmap(\n",
" heatmap_ratio, \n",
" cmap=\"Reds\",\n",
" linewidths=.3,\n",
" linecolor=\"grey\",\n",
" annot=True,\n",
" fmt=\".2%\",\n",
" cbar_kws={'label': 'Proportion de ruptures'}\n",
")\n",
"\n",
"plt.title(\"Heatmap de la proportion de ruptures (par année et mois)\", fontsize=16)\n",
"plt.xlabel(\"Mois\")\n",
"plt.ylabel(\"Année\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "aa5862ab-ec8e-47f8-8cb0-cd51503efed8",
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = merged_isin.copy()\n",
"\n",
"# Ajouter year / month au cas où\n",
"df[\"year\"] = df[\"Centralisation Date\"].dt.year\n",
"df[\"month\"] = df[\"Centralisation Date\"].dt.month\n",
"\n",
"# Merge géographique\n",
"df = df.merge(\n",
" geo[[\"Registrar Account - ID\", \"country\"]],\n",
" on=\"Registrar Account - ID\",\n",
" how=\"left\"\n",
")\n",
"\n",
"df[\"country\"] = df[\"country\"].fillna(\"UNKNOWN\")\n",
"\n",
"# Total des lignes par pays\n",
"total_country = df.groupby(\"country\").size().reset_index(name=\"total_obs\")\n",
"\n",
"# Nombre de ruptures\n",
"rupt_country = (\n",
" df[df[\"rupture_flag\"]]\n",
" .groupby(\"country\")\n",
" .size()\n",
" .reset_index(name=\"ruptures\")\n",
")\n",
"\n",
"# Merge + ratios\n",
"country_stats = total_country.merge(rupt_country, on=\"country\", how=\"left\")\n",
"country_stats[\"ruptures\"] = country_stats[\"ruptures\"].fillna(0)\n",
"country_stats[\"rupture_ratio\"] = country_stats[\"ruptures\"] / country_stats[\"total_obs\"]\n",
"\n",
"# Tri (rupture ratio décroissant)\n",
"country_stats = country_stats.sort_values(\"rupture_ratio\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "86d2a91c-d8d8-416c-8dc4-dc3f4ae7ca90",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"customdata": {
"bdata": "xU7sxE5sWEAAAAAAAMBvQAAAAAAAQHBA198e2OhdR0AAAAAAABB9QAAAAAAAGI9AlH+9dRU2RUAAAAAAAJjBQAAAAACAvNRAHFtG3vbHQ0AAAAAAAICOQAAAAAAARqNA0F5CewntOUAAAAAAAAA8QAAAAAAAAFtANJzUfXlsOUAAAAAAAAAuQAAAAAAAgE1A4SpTwHwgOUAAAAAAAMBYQAAAAAAAoHhAZluhtRLZN0AAAAAAYMHrQAAAAACgGA1BXn0WV0YbNkAAAAAAuMQCQQAAAACYOSVBPxKbCsIRNkAAAAAAYM3zQAAAAACMbhZBzafGIeilM0AAAAAAQEn3QAAAAAAIoR1By3uP6ZWYMUAAAAAA4MPqQAAAAAB8AxNBNeF+QPqVLkAAAAAAwBLaQAAAAADATwVBb8JdUw+UKkAAAAAAwKHWQAAAAACwSQVBjhDbtOLeKUAAAAAAAOBxQAAAAAAARqFARQMgZFR6KEAAAAAAAMCMQAAAAAAAXb1ATK4gJleQJ0AAAAAAALCbQAAAAAAAYM1A05E9A1HpJkAAAAAAAKBmQAAAAAAAsJhAKTDp1DzIJUAAAAAAAAA2QAAAAAAAQGlAj8DhOzKPJEAAAAAA0FELQQAAAAA1nEBBIPw39b67I0AAAAAAALagQAAAAADAK9VA1sr0lOakIUAAAAAAALCAQAAAAAAApbdAL/vifHPjIEAAAAAAAPixQAAAAABgmepATufRh/2UHUAAAAAAAABXQAAAAAAAcJNAyEIWspCFHEAAAAAAAIBEQAAAAAAA+IFALDDozo3/EUAAAAAAAHCbQAAAAABADuNAYQb4iZOHDkAAAAAAAIBIQAAAAAAAEJRAawqzdyc0DkAAAAAAAFiiQAAAAADgXe5AfscrRvyXDUAAAAAAAIBOQAAAAAAAxJlAeeJY/KtzBEAAAAAAAIBFQAAAAAAASJpAyp2ihTMgBEAAAAAAAABcQAAAAAAAZLFAAAAAAADAAkAAAAAAAGBoQAAAAAAAQMBAC0fmBDyf/j8AAAAAAAAQQAAAAAAAIGpAmb5KGJsF9T8AAAAAAAAiQAAAAAAAaIVAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADZAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGRAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFRA",
"dtype": "f8",
"shape": "37, 3"
},
"hovertemplate": "Pays=%{x}<br>% de ruptures=%{customdata[0]:.2f}<br>Nb de ruptures=%{customdata[1]}<br>Nb d'observations=%{customdata[2]}<extra></extra>",
"legendgroup": "",
"marker": {
"color": "#636efa",
"pattern": {
"shape": ""
}
},
"name": "",
"orientation": "v",
"showlegend": false,
"textposition": "auto",
"type": "bar",
"x": [
"JAPAN",
"TAIWAN",
"SWEDEN",
"DENMARK",
"SOUTH AFRICA",
"LITHUANIA",
"FINLAND",
"GERMANY",
"ITALY",
"LUXEMBOURG",
"SWITZERLAND",
"SPAIN",
"BELGIUM",
"UNITED KINGDOM",
"MAURITIUS",
"PORTUGAL",
"AUSTRIA",
"UNITED ARAB EMIRATES",
"CZECH REPUBLIC",
"FRANCE",
"MONACO",
"ISRAEL",
"NETHERLANDS",
"GREECE",
"CANADA",
"LATAM",
"INTERNATIONAL",
"US OFFSHORE",
"MALTA",
"IRELAND",
"UNKNOWN",
"SINGAPORE",
"POLAND",
"HONG KONG",
"NORWAY",
"SOUTH KOREA",
"UNITED STATES"
],
"xaxis": "x",
"y": {
"bdata": "L/RCL/RC7z/W1t8e2OjdP6kyY96BJts/gO/U1N9R2T97Ce0ltJfQP5zUfXlsRdA/ryWHuMoU0D/+dCBZfobOP+ndyiffS8w/y1RqsbA/zD/yMlDZRybJP2DwmJvehcY/S+xvhS+Twz89SR0rmQLBP624d5KljsA/WGCP5uZUvz/S/YVPZSm+P3SHbeWaU70/uetni5Xhuz+iUrAj5FC6P83wKBs8Qrk/3w1YBqKVtj+OGOV2BJ61P2UjU7227rI/V8T5cQBBsj++60dGqwmnP8NLYQb4iaM/OsmlI5RUoz8T5pZ0ofCiP8ScFfGoLZo/AqHkTtHCmT8AAAAAAACYP2N/GCwcmZM/40X5mXToij8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"dtype": "f8"
},
"yaxis": "y"
}
],
"layout": {
"bargap": 0.2,
"barmode": "relative",
"legend": {
"tracegroupgap": 0
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermap": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermap"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Proportion de ruptures par pays (avec volumes au survol)"
},
"xaxis": {
"anchor": "y",
"domain": [
0,
1
],
"tickangle": -45,
"title": {
"text": "Pays"
}
},
"yaxis": {
"anchor": "x",
"domain": [
0,
1
],
"tickformat": ".1%",
"title": {
"text": "Proportion de ruptures"
}
}
}
},
"image/png": "iVBORw0KGgoAAAANSUhEUgAABa0AAAFoCAYAAABUqteyAAAQAElEQVR4AeydCaAV0x/Hf1NJKyWy5E+WbNmyy1Yi2RNFERFKQouKSEWiKAkpa5SiZJe1RGQJ2ULWbCEiWlH6n8+peebd7n3denPvu++9b5w3c2fOnOUzZ86c8z3n/KbMCv0TAREQAREQAREQAREQAREQAREQAREo6QSUPxEQAREQAREoNgTKmP6JgAiIgAiIgAiIgAisIwFdJgIiIAIiIAIiIAIiIAIiIAIiEDcBidZxE1V4hSegEERABERABERABERABERABERABERABEo+AeVQBERABFIQkGidAowOi4AIiIAIiIAIiIAIiEBxJKA0i4AIiIAIiIAIiIAIiEBxJyDRurjfQaVfBEQgGwQUhwiIgAiIgAiIgAiIgAiIgAiIgAiIQMknoBzmCAGJ1jlyI5QMERABERABERABERABERABESiZBJQrERABERABERABEVg7AhKt146XfIuACIiACIhAbhBQKkRABERABERABERABERABERABESghBKQaB25sdoVAREQAREQAREQAREQAREQAREQAREo+QSUQxEQAREQgdwmINE6t++PUicCIiACIiACIiACxYWA0ikCIiACIiACIiACIiACIiACsRCQaB0LRgUiApkioHBFQAREQAREQAREQAREQAREQAREQARKPgHlUAREIEpAonWUhvZFQAREQAREQAREQAREQARKDgHlRAREQAREQAREQAREoFgSkGhdLG+bEi0CIiACRUdAMYuACIiACIiACIiACIiACIiACIiACJR8AkWZQ4nWRUlfcYuACIiACIiACIiACIiACIiACJQmAsqrCIiACIiACIhAGgQkWqcBSV5EQAREQAREQARymYDSJgIiIAIiIAIiIAIiIAIiIAIiUJIISLQuSXczzrwoLBEQAREQAREQAREQAREQAREQAREQgZJPQDkUAREQgRwkINE6B2+KkiQCIiACIiACIiACIlC8CSj1IiACIiACIiACIiACIiAC605AovW6s9OVIiAC2SWg2ERABERABERABERABERABERABERABEo+AeVQBEyitQqBCIiACIiACIiACIiACIiACJR4AsqgCIiACIiACIiACBQfAhKti8+9UkpFQAREQARyjYDSIwIiIAIiIAIiIAIiIAIiIAIiIAIiEDuBnBOtY8+hAhQBERABERABERABERABERABERABEcg5AkqQCIiACIiACKQiINE6FRkdFwEREAEREAEREIHiR0ApFgEREAEREAEREAEREAEREIFiT0CidbG/hcpA5gkoBhEQAREQAREQAREQAREQAREQAREQgZJPQDkUARHIFQISrXPlTigdIiACIiACIiACIiACIlASCShPIiACIiACIiACIiACIrCWBCRaryUweRcBERCBXCCgNIiACIiACIiACIiACIiACIiACIiACJR8AqU1hxKtS+udV75FQAREQAREQAREQAREQAREoHQSUK5FQAREQAREQARynIBE6xy/QUqeCIiACIiACBQPAkqlCIiACIiACIiACIiACIiACIiACMRDQKJ1PBwzE4pCFQEREAEREAEREAEREAEREAEREAERKPkElEMREAEREIF8BCRa58OhHyIgAiIgAiIgAiIgAiWFgPIhAiIgAiIgAiIgAiIgAiJQPAlItC6e902pFoGiIqB4RUAEREAEREAEREAEREAEREAEREAESj4B5VAEipSAROsixa/IRUAEREAEREAEREAEREAESg8B5VQEREAEREAEREAERCAdAhKt06EkPyIgAiIgArlLQCkTAREQAREQAREQAREQAREQAREQAREoUQSSitYlKofKjAiIgAiIgAiIgAiIgAiIgAiIgAiIQFICOigCIiACIiACuUhAonUu3hWlSQREQAREQAREoDgTUNpFQAREQAREQAREQAREQAREQAQKQUCidSHg6dJsElBcIiACIiACIiACIiACIiACIiACIiACJZ+AcigCIiACZhKtVQpEQAREQAREQAREQAREoKQTUP5EQAREQAREQAREQAREoBgRkGhdjG6WkioCIpBbBJQaERABERABERABERABERABERABERCBkk9AOcw+AYnW2WeuGEVABERABERABERABERABESgtBNQ/kVABERABERABEQgJQGJ1inR6EQuE7h77ERr0qq7/TJvfi4nU2kTgbQJvDRthtVt0MbO6TzAFi9ZmvZ1Jd0jzzjPes/r7izpWY0pfyU3mGRlQc9Nyb3fypkIiIAIiIAIiIAIiIAIiEDpJiDRek33f9V5RFIEpURXUoQU8oEwhCiwKst+M3PWbDvwuA7GeX9Af2IjELKlbMUWqAIqlgR47gbcOtZOPOogu+emHlapYoVimQ8lWgSyTaBh/Xo2bkQf++Tzb6zfkFHZjl7xiYAIiIAIiIAIlDQCyo8IiIAIiEDOEJBovRa3YoMqlXzneOaUkYajo/zSazM0M3ItGMqrCIjA6gRuumO8P9j5/OZ+qz8iIALpE6i7Y2079/TjjPcxg4HpXymf2SKgeERABERABERABERABERABERgbQlItF5bYhH/dJT79zzP3pzxiXeRUyVmlzy+/tQw63/5eSUmTyUgI8pCCSKAyIbY1vz4BrZJjWolKGfKighkj8AJjevbhhtUsQceeSF7kSomERABERABERABERABEcg8AcUgAqWWgETrQt76mjWqGzOwv/rmRx8Sph4wp4EQhUmN0JwIdje9B/cnepzz/HaH8/7HVACmOjjOdfgJHeHneVy1E/oP/bDlulWn/Yb0kC6u5xx+cMSBe/y51+y7OXOtwcmdvF1d/HINjn2u8wGt+rO2cXI98YWONKwKao2bZHE9M/nNpNcl85tuXHCA++zvfvKz50kreYcB6ecc4UcjJuzQT3g8DOfjz2Z7u9uEg4vaKibMc7sOtD8XLrbBI8Z55vjhWsJZl/iSpZuwsI9M3IQfOsLnXNSRl/B8uOVY1E+yfcKCwVszPs3jxvVNEmyOJ0sH/kgb56Jhw4HrU+Up6jfcTzcd+McvcUcdeeC+cB6Hn8RjHMclniP95CMaHr85jv+C3BvvfuxPH7DXLn4b/UN5g0M0XPbhE/ojDuLCsR8eZ8tvjuPY5xiO9BNO6BLP4wfH/Q/9hFuu5VwyF6Y3mr7QX6pz+A3DZsvv8JpUW+4T94b0Rf2EcUTTyD4ME59HjuE/DIu4ceHxaLjsExfnQ5fMX2JY+CV+ri/IcW+4B/iPOo5xLnotfJIdJ55kaYpeG+4n5oU4ORaeD/MRPcY5eBEHcfEbxz73gmtIG2HhHnt2qq//OIa/qAvDSTzHb64NHb+j1xW0z4APAz/vfvi5vnVQECidEwERWEVAGxEQAREQAREQAREQgVwnINE6A3cIIbJFuz525GH7eDMimBLB7ibiA2LDT3N/s+nPDPfnpkwYYnSyOc75aHIQkl94+W3vjzAwR3LXA08ZIkHoD6HghLN6Gp11/OBu7X+Jdex5cz5/oX8E0miYzKDGYUv3f1vUNNJDGMyuZpZ1eF10uy5xcj3h4rq0a2E9+99phMPxghx+yN9eu9XJ40AYO2y75WqXIbAgukdZwIy4osxWuzByAOG+5QVXW9f2p/r4CuIQuWy1XcLp0meYjbqlpw+H+40n7gv3GbZ3DeruBzzgQZ5w3Av8ra0jvmTphl+j5l1ss5ob+XQQB/d4/JNTLCoIwY60UXbwgyNdsCOMNaWHMn9Jr6F53Liee9b6ov75BKQqVSrmM7GTyCUaT6o8Rf0k7qebDq4jf6QzdJgXYCAhzG+qmZvcv9fe+tAaHlTPuI/8hh1hkh/Cg/Gcn+et0cYu1xLWznW2tm222owgVnNbbFoj77kMw6bOCO8f9q9bN2/sV3uw6iMawNff/uRt/R60327eTjbxUddw/0kj4eEoH83aXpXvXhE+959nCD84rvly9g/JPxTpIg6FQ2aOhxzdYf//R7O+9gNj1IscCNOSbn3INevqKEvR55F8EBb1Rde+w+yJ+/r75yM8HpprwQ8uGQvqGeqmMJ9
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# On ajoute une colonne en % pour laffichage\n",
"country_stats_plot = country_stats.copy()\n",
"country_stats_plot[\"rupture_pct\"] = country_stats_plot[\"rupture_ratio\"] * 100\n",
"\n",
"# Tri décroissant par proportion de ruptures\n",
"country_stats_plot = country_stats_plot.sort_values(\"rupture_ratio\", ascending=False)\n",
"\n",
"fig = px.bar(\n",
" country_stats_plot,\n",
" x=\"country\",\n",
" y=\"rupture_ratio\",\n",
" hover_data={\n",
" \"rupture_pct\": ':.2f',\n",
" \"ruptures\": True,\n",
" \"total_obs\": True,\n",
" \"rupture_ratio\": False, # on cache la version décimale\n",
" },\n",
" labels={\n",
" \"country\": \"Pays\",\n",
" \"rupture_ratio\": \"Proportion de ruptures\",\n",
" \"rupture_pct\": \"% de ruptures\",\n",
" \"ruptures\": \"Nb de ruptures\",\n",
" \"total_obs\": \"Nb d'observations\"\n",
" },\n",
" title=\"Proportion de ruptures par pays (avec volumes au survol)\"\n",
")\n",
"\n",
"# Format en %\n",
"fig.update_yaxes(tickformat=\".1%\")\n",
"\n",
"fig.update_layout(\n",
" xaxis_tickangle=-45,\n",
" bargap=0.2\n",
")\n",
"\n",
"fig.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "e1c114db-5fbd-4cd3-a897-b9d4c96053fd",
"metadata": {},
"outputs": [],
"source": [
"df[df['country']=='JAPAN'].to_csv('Japan.csv')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "95bc353d-e883-4989-aaca-1b3c9b51ee5a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"=== BASIC NUMERIC STATS ===\n",
"count 12501.000000\n",
"mean 0.069464\n",
"std 0.137360\n",
"min 0.000000\n",
"1% 0.000000\n",
"5% 0.000000\n",
"10% 0.000000\n",
"25% 0.000000\n",
"50% 0.013699\n",
"75% 0.069767\n",
"90% 0.213497\n",
"95% 0.333333\n",
"99% 0.720000\n",
"max 0.992308\n",
"Name: rupture_ratio, dtype: float64\n",
"rupture_bucket\n",
"00.1% 12\n",
"0.11% 619\n",
"15% 3078\n",
"510% 1189\n",
"1025% 1463\n",
"2550% 703\n",
"50100% 311\n",
"0% 5126\n",
"Name: count, dtype: int64\n",
"\n",
"=== DISTRIBUTION (PERCENT) ===\n",
"rupture_bucket\n",
"00.1% 0.10\n",
"0.11% 4.95\n",
"15% 24.62\n",
"510% 9.51\n",
"1025% 11.70\n",
"2550% 5.62\n",
"50100% 2.49\n",
"0% 41.00\n",
"Name: count, dtype: float64\n",
"\n",
"Comptes avec 0 rupture = 5126 (41.00%)\n",
"Comptes avec rupture_ratio > 75% = 99 (0.79%)\n",
"Comptes avec rupture_ratio > 10% = 2477 (19.81%)\n"
]
}
],
"source": [
"rs = rupture_summary.copy()\n",
"\n",
"# 1. Stats numériques classiques\n",
"print(\"\\n=== BASIC NUMERIC STATS ===\")\n",
"print(rs[\"rupture_ratio\"].describe(percentiles=[0.01, 0.05, 0.10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.99]))\n",
"\n",
"\n",
"# 2. Distribution par classes (bins)\n",
"\n",
"rs[\"rupture_bucket\"] = pd.cut(\n",
" rs[\"rupture_ratio\"],\n",
" bins=[0, 0.001, 0.01, 0.05, 0.10, 0.25, 0.50, 1.01],\n",
" labels=[\n",
" \"00.1%\",\n",
" \"0.11%\",\n",
" \"15%\",\n",
" \"510%\",\n",
" \"1025%\",\n",
" \"2550%\",\n",
" \"50100%\"\n",
" ],\n",
" include_lowest=True\n",
")\n",
"\n",
"# Ajouter la catégorie \"0%\"\n",
"rs[\"rupture_bucket\"] = rs[\"rupture_bucket\"].cat.add_categories(\"0%\")\n",
"\n",
"# Remplacer les 0% exacts\n",
"rs.loc[rs[\"rupture_ratio\"] == 0, \"rupture_bucket\"] = \"0%\"\n",
"\n",
"bucket_counts = rs[\"rupture_bucket\"].value_counts().sort_index()\n",
"print(bucket_counts)\n",
"\n",
"\n",
"# 3. Pourcentages\n",
"bucket_percent = (bucket_counts / len(rs) * 100).round(2)\n",
"\n",
"print(\"\\n=== DISTRIBUTION (PERCENT) ===\")\n",
"print(bucket_percent)\n",
"\n",
"\n",
"# 4. Nombre de comptes totalement propres\n",
"no_rupture = (rs[\"n_ruptures\"] == 0).sum()\n",
"print(f\"\\nComptes avec 0 rupture = {no_rupture} ({no_rupture/len(rs)*100:.2f}%)\")\n",
"\n",
"# 5. Comptes extrêmement problématiques\n",
"severe = (rs[\"rupture_ratio\"] > 0.75).sum()\n",
"print(f\"Comptes avec rupture_ratio > 75% = {severe} ({severe/len(rs)*100:.2f}%)\")\n",
"\n",
"medium = (rs[\"rupture_ratio\"] > 0.10).sum()\n",
"print(f\"Comptes avec rupture_ratio > 10% = {medium} ({medium/len(rs)*100:.2f}%)\")\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "425b36d0-c92a-4405-be28-35b1fc292fec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre total de ruptures en 2021 : 71090\n",
"\n",
"=== RUPTURES 2021 — POSITIVES vs NEGATIVES ===\n",
"gap_type\n",
"negative 38092\n",
"positive 32998\n",
"Name: count, dtype: int64\n",
"\n",
"(%)\n",
"gap_type\n",
"negative 53.58%\n",
"positive 46.42%\n",
"Name: proportion, dtype: object\n",
"\n",
"=== STATISTIQUES DES GAPS ===\n",
" count mean std min 25% \\\n",
"gap_type \n",
"negative 38092.0 -7150.538497 99000.868201 -5.257530e+06 -438.04975 \n",
"positive 32998.0 9865.849003 139488.218647 1.000000e-05 22.76075 \n",
"\n",
" 50% 75% max \n",
"gap_type \n",
"negative -57.9725 -2.74375 -6.000000e-06 \n",
"positive 159.3720 1007.14050 1.814960e+07 \n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2QAAAHWCAYAAAAYdUqfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAeg1JREFUeJzt3Xl4Def///HXyb7IIogkBLGLfRdVSy2haLVatEooWoqW1FJdbC26KVotXdGWFm3RD7XG1iXWSi1FUZq2RGJJIkFCMr8/fHN+joTkxIkj6fNxXedqZuaemfeZJJXXue+5x2QYhiEAAAAAwB3nYO8CAAAAAOC/ikAGAAAAAHZCIAMAAAAAOyGQAQAAAICdEMgAAAAAwE4IZAAAAABgJwQyAAAAALATAhkAAAAA2AmBDAAAAADshEAGoEibOHGiTCbTHTlX69at1bp1a/Py5s2bZTKZ9M0339yR8/fr108VKlS4I+fKi6z3v3nzZnuXUijNnz9fJpNJJ06csHcpAIACRCADUGhk/YGa9XJzc1NQUJDCw8P17rvv6sKFCzY5z8mTJzVx4kTFxMTY5Hi2dDfXBlzvl19+0cSJE5WYmGjvUuxm586dGjZsmGrWrClPT0+VK1dOPXr00B9//JFj+4MHD6pjx44qVqyY/Pz81KdPHyUkJFi0OXTokMaMGaN69erJy8tLgYGB6ty5s3bt2pXteIcPH9bIkSPVvHlzubm5EfCBuxSBDEChM3nyZH3xxReaM2eOhg8fLkkaMWKEateurb1791q0ffnll3Xp0iWrjn/y5ElNmjTJ6tCzbt06rVu3zqp9rHWr2j7++GMdPny4QM8P5NUvv/yiSZMm/acD2RtvvKFvv/1Wbdu21axZs/TUU09p69atatCggfbv32/R9p9//lHLli119OhRTZ06VaNGjdKqVavUvn17paenm9t98skn+vjjj9WoUSNNnz5dkZGROnz4sJo1a6YNGzZYHDM6Otr8YVWNGjXuyHsGYD0nexcAANbq1KmTGjVqZF4eN26cNm7cqC5duuiBBx7QwYMH5e7uLklycnKSk1PB/q/u4sWL8vDwkIuLS4GeJzfOzs52PT8Kn9TUVHl6etq7DKsUppojIyO1aNEii/839OzZU7Vr19brr7+uL7/80rx+6tSpSk1N1e7du1WuXDlJUpMmTdS+fXvNnz9fTz31lCTpscce08SJE1WsWDHzvk8++aRq1KihiRMnql27dub1DzzwgBITE+Xl5aW3336bnnXgLkUPGYAi4b777tMrr7yiv/76y+KPnJzuIVu/fr1atGghX19fFStWTNWqVdOLL74o6dp9T40bN5Yk9e/f3zw8cv78+ZKu3SdWq1Yt7d69Wy1btpSHh4d53xvvIcuSkZGhF198UQEBAfL09NQDDzygv//+26JNhQoV1K9fv2z7Xn/M3GrL6R6y1NRUPf/88woODparq6uqVaumt99+W4ZhWLQzmUwaNmyYli9frlq1asnV1VU1a9bUmjVrcr7gN/jnn3/UrVs3eXp6yt/fXyNHjlRaWlqObbdv366OHTvKx8dHHh4eatWqlX7++WeLNhcuXNCIESNUoUIFubq6yt/fX+3bt9evv/6aay2bN29Wo0aN5ObmpkqVKunDDz/M8edg3rx5uu++++Tv7y9XV1eFhoZqzpw52Y5XoUIFdenSRevWrVO9evXk5uam0NBQfffddxbtrly5okmTJqlKlSpyc3NTiRIl1KJFC61fvz7Xmg8cOKD77rtP7u7uKlu2rF577TVlZmbm2Hb16tW699575enpKS8vL3Xu3FkHDhzI9RxZQ363bNmiZ555Rv7+/ipbtqykm99/mNN1y/pZWbhwoapVqyY3Nzc1bNhQW7dutdhv9OjRkqSQkBDzz+qJEyd04sQJi5/bG489ceLEbOf//fff9fjjj6t48eJq0aKFefuXX36phg0byt3dXX5+furVq1e2362b+ffff/Xkk0+qdOnS5p/3zz77zKJN1n2QS5Ys0ZQpU1S2bFm5ubmpbdu2Onr0aK7naN68ebYPaqpUqaKaNWvq4MGDFuu//fZbdenSxRzGJKldu3aqWrWqlixZYl7XsGFDizAmSSVKlNC9996b7Zh+fn7y8vLKtU4A9kUPGYAio0+fPnrxxRe1bt06DRo0KMc2Bw4cUJcuXVSnTh1NnjxZrq6uOnr0qDkQ1KhRQ5MnT9b48eP11FNP6d5775V07Q+rLGfPnlWnTp3Uq1cvPfHEEypduvQt65oyZYpMJpPGjh2r+Ph4zZw5U+3atVNMTIy5Jy8v8lLb9QzD0AMPPKBNmzZpwIABqlevntauXavRo0fr33//1YwZMyza//TTT/ruu+/0zDPPyMvLS++++666d++u2NhYlShR4qZ1Xbp0SW3btlVsbKyeffZZBQUF6YsvvtDGjRuztd24caM6deqkhg0basKECXJwcDAHox9//FFNmjSRJA0ePFjffPONhg0bptDQUJ09e1Y//fSTDh48qAYNGty0lj179qhjx44KDAzUpEmTlJGRocmTJ6tUqVLZ2s6ZM0c1a9bUAw88ICcnJ/3vf//TM888o8zMTA0dOtSi7ZEjR9SzZ08NHjxYERERmjdvnh599FGtWbNG7du3l3QtPEybNk0DBw5UkyZNlJycrF27dunXX381t8lJXFyc2rRpo6tXr+qFF16Qp6enPvrooxx/Nr744gtFREQoPDxcb7zxhi5evKg5c+aoRYsW2rNnT54mdXnmmWdUqlQpjR8/Xqmpqbm2z8mWLVu0ePFiPfvss3J1ddUHH3ygjh07aseOHapVq5Yefvhh/fHHH/rqq680Y8YMlSxZUpJUqlSpbPdE5cWjjz6qKlWqaOrUqeYPE6ZMmaJXXnlFPXr00MCBA5WQkKD33ntPLVu21J49e+Tr63vT450+fVrNmjUzh8tSpUpp9erVGjBggJKTkzVixAiL9q+//rocHBw0atQoJSUl6c0331Tv3r21fft2q9+LYRg6ffq0atasaV7377//Kj4+3qLnP0uTJk30ww8/5HrcuLg483UGUMgYAFBIzJs3z5Bk7Ny586ZtfHx8jPr165uXJ0yYYFz/v7oZM2YYkoyEhISbHmPnzp2GJGPevHnZtrVq1cqQZMydOzfHba1atTIvb9q0yZBklClTxkhOTjavX7JkiSHJmDVrlnld+fLljYiIiFyPeavaIiIijPLly5uXly9fbkgyXnvtNYt2jzzyiGEymYyjR4+a10kyXFxcLNb99ttvhiTjvffey3au682cOdOQZCxZssS8LjU11ahcubIhydi0aZNhGIaRmZlpVKlSxQgPDzcyMzPNbS9evGiEhIQY7du3N6/z8fExhg4desvz5qRr166Gh4eH8e+//5rXHTlyxHBycjJu/Cfv4sWL2fYPDw83KlasaLGufPnyhiTj22+/Na9LSkoyAgMDLX7W6tata3Tu3NnqmkeMGGFIMrZv325eFx8fb/j4+BiSjOPHjxuGYRgXLlwwfH19jUGDBlnsHxcXZ/j4+GRbf6Os358WLVoYV69etdh2489Olht/fwzj2s+KJGPXrl3mdX/99Zfh5uZmPPTQQ+Z1b731lkX9WY4fP37Tn2FJxoQJE7Kd/7HHHrNod+LECcPR0dGYMmWKxfp9+/YZTk5O2dbfaMCAAUZgYKBx5swZi/W9evUyfHx8zD8bWb/DNWrUMNLS0sztZs2aZUgy9u3bd8vz5OSLL74wJBmffvqpeV3W7/Xnn3+erf3o0aMNScbly5dvesytW7caJpPJeOWVV27a5mbfDwD2x5BFAEVKsWLFbjnbYtan5itWrLjpkLDcuLq6qn///nlu37dvX4thQ4888ogCAwPz9Kn37fjhhx/k6OioZ5991mL9888/L8MwtHr1aov17dq1U6VKlczLderUkbe3t/78889czxMYGKhHHnnEvM7Dw8N8z0uWmJgYHTlyRI8//rjOnj2rM2fO6MyZM0pNTVXbtm21detW8/fE19dX27dv18mTJ/P8fjMyMrRhwwZ169ZNQUFB5vWVK1dWp06dsrW/vgcqKSlJZ86
"text/plain": [
"<Figure size 1000x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# --- 1. Filtres de base ---\n",
"merged[\"year\"] = merged[\"Centralisation Date\"].dt.year\n",
"\n",
"# Filtrer uniquement l'année 2021\n",
"ruptures_2021 = merged[(merged[\"year\"] == 2021) & (merged[\"rupture_flag\"] == True)].copy()\n",
"\n",
"print(\"Nombre total de ruptures en 2021 :\", len(ruptures_2021))\n",
"\n",
"# --- 2. Classification du type de gap ---\n",
"ruptures_2021[\"gap_type\"] = np.where(ruptures_2021[\"gap\"] > 0, \"positive\", \"negative\")\n",
"\n",
"# --- 3. Statistiques globales ---\n",
"gap_counts = ruptures_2021[\"gap_type\"].value_counts()\n",
"gap_percent = ruptures_2021[\"gap_type\"].value_counts(normalize=True) * 100\n",
"\n",
"print(\"\\n=== RUPTURES 2021 — POSITIVES vs NEGATIVES ===\")\n",
"print(gap_counts)\n",
"print(\"\\n(%)\")\n",
"print(gap_percent.map(lambda x: f\"{x:.2f}%\"))\n",
"\n",
"# --- 4. Intensité des écarts ---\n",
"intensity_stats = ruptures_2021.groupby(\"gap_type\")[\"gap\"].describe()\n",
"print(\"\\n=== STATISTIQUES DES GAPS ===\")\n",
"print(intensity_stats)\n",
"\n",
"# --- 5. Visualisation rapide ---\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(10,5))\n",
"sns.histplot(data=ruptures_2021, x=\"gap\", hue=\"gap_type\", bins=80, kde=True)\n",
"plt.xlim(-merged[\"gap\"].abs().max(), merged[\"gap\"].abs().max())\n",
"plt.title(\"Distribution des gaps de rupture en 2021\")\n",
"plt.xlabel(\"Gap (AUM_{t} Expected AUM_{t})\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "df9e0005-93f2-4885-baef-2e54921a42f4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"=== RUPTURE SUMMARY (in %) ===\n",
" total_obs rupture_count rupture_ratio\n",
"period \n",
"after_2021 935390 242411 25.92\n",
"before_2021 3414943 428083 12.54\n",
"during_2021 529964 71090 13.41\n",
"\n",
"=== GAP POSITIVE / NEGATIVE DISTRIBUTION (in %) ===\n",
"period period gap_type\n",
"after_2021 after_2021 negative 60.163524\n",
" positive 39.836476\n",
"before_2021 before_2021 negative 62.273905\n",
" positive 37.726095\n",
"during_2021 during_2021 negative 53.582782\n",
" positive 46.417218\n",
"dtype: float64\n"
]
}
],
"source": [
"# --- 1. ADD YEAR ---\n",
"merged[\"year\"] = merged[\"Centralisation Date\"].dt.year\n",
"\n",
"# --- 2. DEFINE PERIODS ---\n",
"conditions = [\n",
" merged[\"year\"] < 2021,\n",
" merged[\"year\"] == 2021,\n",
" merged[\"year\"] > 2021\n",
"]\n",
"\n",
"period_labels = [\"before_2021\", \"during_2021\", \"after_2021\"]\n",
"\n",
"merged[\"period\"] = np.select(\n",
" conditions,\n",
" period_labels,\n",
" default=\"unknown\"\n",
")\n",
"\n",
"# --- 3. CREATE GAP TYPE & FILTER ONLY RUPTURES ---\n",
"merged[\"gap_type\"] = np.where(\n",
" merged[\"gap\"] > 0, \"positive\",\n",
" np.where(merged[\"gap\"] < 0, \"negative\", \"zero\")\n",
")\n",
"\n",
"ruptures = merged[merged[\"rupture_flag\"] == True].copy()\n",
"\n",
"# --- 4. TOTAL OBS PER PERIOD ---\n",
"total_obs = merged.groupby(\"period\").size().rename(\"total_obs\")\n",
"\n",
"# --- 5. TOTAL RUPTURES PER PERIOD ---\n",
"rupture_counts = ruptures.groupby(\"period\").size().rename(\"rupture_count\")\n",
"\n",
"# --- 6. PROPORTION OF RUPTURES ---\n",
"rupture_ratio = (rupture_counts / total_obs).rename(\"rupture_ratio\")\n",
"\n",
"# --- 7. POSITIVE / NEGATIVE GAPS (% among ruptures) ---\n",
"gap_dist = (\n",
" ruptures.groupby([\"period\", \"gap_type\"])\n",
" .size()\n",
" .groupby(level=0)\n",
" .apply(lambda x: (x / x.sum()) * 100) # % par période\n",
")\n",
"\n",
"\n",
"# --- 8. MERGE AND DISPLAY ---\n",
"summary = pd.concat([total_obs, rupture_counts, rupture_ratio], axis=1)\n",
"summary[\"rupture_ratio\"] = (summary[\"rupture_ratio\"] * 100).round(2)\n",
"\n",
"print(\"\\n=== RUPTURE SUMMARY (in %) ===\")\n",
"print(summary)\n",
"\n",
"print(\"\\n=== GAP POSITIVE / NEGATIVE DISTRIBUTION (in %) ===\")\n",
"print(gap_dist)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}