diff --git a/carmignac_diagnostics.py b/carmignac_diagnostics.py
deleted file mode 100644
index 45afafe..0000000
--- a/carmignac_diagnostics.py
+++ /dev/null
@@ -1,962 +0,0 @@
-"""
-Carmignac Data Challenge — Broken Months Diagnostics
-=====================================================
-Detects months where the aggregate stock-flow equation is violated
-at the ISIN level (across all accounts):
-
- Σ_r Q_{r,s}(t) - Σ_r Q_{r,s}(t-1) ≠ Σ_r F_{r,s}(t-1→t)
-
-The residual is the "missing flow":
- missing_{s}(t) = [Q_agg(t) - Q_agg(t-1)] - F_agg(t)
-
-This is a market-level check, independent of individual account identity.
-It captures:
- - Genuinely missing flow records
- - End-of-month accounting lags (transactions dated at boundary)
- - Corporate actions (dividends, splits) not reflected in flows
-
-Outputs
--------
- carmignac_broken_months.csv — machine-readable, loaded by carmignac_repair.py
- carmignac_diagnostics.html — interactive HTML report
-
-Usage
------
- python carmignac_diagnostics.py
- python carmignac_diagnostics.py \\
- --aum raw_AUM.csv \\
- --flows raw_flows.csv \\
- --out carmignac_broken_months.csv \\
- --html carmignac_diagnostics.html \\
- --alpha 0.02
-"""
-
-import argparse
-import json
-import os
-import sys
-
-import numpy as np
-import pandas as pd
-
-
-# ─────────────────────────────────────────────────────────────
-# 1. LOAD
-# ─────────────────────────────────────────────────────────────
-
-def load_data(aum_path, flows_path):
- aum = pd.read_csv(aum_path, parse_dates=["Centralisation Date"])
- flows = pd.read_csv(flows_path, parse_dates=["Centralisation Date"])
- aum["Product - Isin"] = aum["Product - Isin"].astype(str)
- flows["Product - Isin"] = flows["Product - Isin"].astype(str)
- return aum, flows
-
-
-# ─────────────────────────────────────────────────────────────
-# 2. AGGREGATE AND DETECT BROKEN MONTHS
-# ─────────────────────────────────────────────────────────────
-
-def detect_broken_months(aum, flows, alpha=0.02, lag_days=3):
- """
- For each (isin, month-end t), compute:
- - Q_agg(t) : total shares held across all accounts
- - Q_agg(t-1) : idem previous month (forward-filled)
- - F_agg(t) : total net flows recorded in ]EOM(t-1), EOM(t)]
- - missing(t) : [Q_agg(t) - Q_agg(t-1)] - F_agg(t)
- - missing_pct : |missing| / max(Q_agg(t), Q_agg(t-1))
-
- A month is flagged as "broken" when missing_pct > alpha.
-
- Additionally, a month is flagged as a potential "lag" when:
- - It is broken with the standard window
- - But would NOT be broken if flows dated within lag_days of EOM
- are shifted to the adjacent month
-
- Parameters
- ----------
- alpha : tolerance threshold (same as ALPHA in carmignac_repair.py)
- lag_days : number of boundary days to test for accounting lag
-
- Returns
- -------
- df_broken : DataFrame with all (isin, date) pairs where missing_pct > alpha
- df_all : Full DataFrame including non-broken months (for plotting)
- """
- # Monthly calendar
- t_min = aum["Centralisation Date"].min()
- t_max = aum["Centralisation Date"].max()
- all_months = pd.date_range(t_min, t_max, freq="ME")
-
- # ── Aggregate AUM per (isin, month-end) ──────────────────────
- aum_agg = (
- aum.groupby(["Product - Isin", "Centralisation Date"])["Quantity - AUM"]
- .sum()
- .reset_index()
- .rename(columns={"Product - Isin": "isin",
- "Centralisation Date": "date",
- "Quantity - AUM": "qty_agg"})
- )
- # Forward-fill sparse panel
- aum_pivot = aum_agg.pivot(index="date", columns="isin", values="qty_agg")
- aum_pivot = aum_pivot.reindex(all_months).ffill()
-
- # ── Aggregate flows per (isin, month-end) — standard window ──
- def bucket_flows(flows_df, months, lower_offset=0, upper_offset=0):
- """Aggregate flows with optional boundary extension (in days)."""
- fc = flows_df.copy()
- def assign_month(d):
- # Extended window: ]EOM(t-1) - lower_offset, EOM(t) + upper_offset]
- for m in months:
- eom_prev = m - pd.offsets.MonthEnd(1)
- lo = eom_prev - pd.Timedelta(days=lower_offset)
- hi = m + pd.Timedelta(days=upper_offset)
- if lo < d <= hi:
- return m
- return pd.NaT
-
- fc["month_end"] = fc["Centralisation Date"].apply(assign_month)
- fc = fc.dropna(subset=["month_end"])
- agg = (fc.groupby(["Product - Isin", "month_end"])["Quantity - NetFlows"]
- .sum()
- .reset_index()
- .rename(columns={"Product - Isin": "isin",
- "month_end": "date",
- "Quantity - NetFlows": "flow_agg"}))
- return agg
-
- flows_std = bucket_flows(flows, all_months)
- flows_lag = bucket_flows(flows, all_months,
- lower_offset=lag_days,
- upper_offset=lag_days)
-
- def flows_to_pivot(df, months):
- piv = df.pivot(index="date", columns="isin", values="flow_agg")
- return piv.reindex(months).fillna(0.0)
-
- fpiv_std = flows_to_pivot(flows_std, all_months)
- fpiv_lag = flows_to_pivot(flows_lag, all_months)
-
- # ── Compute residuals ─────────────────────────────────────────
- rows = []
- isins = aum_pivot.columns.tolist()
-
- for i in range(1, len(all_months)):
- t_curr = all_months[i]
- t_prev = all_months[i - 1]
-
- for isin in isins:
- q_curr = aum_pivot[isin].get(t_curr, np.nan) if isin in aum_pivot.columns else np.nan
- q_prev = aum_pivot[isin].get(t_prev, np.nan) if isin in aum_pivot.columns else np.nan
-
- if pd.isna(q_curr) or pd.isna(q_prev):
- continue
-
- delta = q_curr - q_prev
-
- # Standard window
- f_std = fpiv_std[isin].get(t_curr, 0.0) if isin in fpiv_std.columns else 0.0
- missing_std = delta - f_std
-
- # Extended lag window
- f_lag = fpiv_lag[isin].get(t_curr, 0.0) if isin in fpiv_lag.columns else 0.0
- missing_lag = delta - f_lag
-
- # ── Denominator choice ────────────────────────────────
- # Normalise by the size of the *movement* (max of delta_AUM
- # and recorded flow), not by the stock level. This avoids
- # astronomically large percentages when a position is tiny
- # but the missing flow is a normal-sized number.
- #
- # Interpretation: "what fraction of the expected movement
- # is unaccounted for?" 100% = the entire movement is missing.
- #
- # A minimum absolute threshold (min_abs_shares) suppresses
- # noise from residual micro-positions (rounding artefacts).
- min_abs_shares = 1.0 # ignore positions smaller than 1 share
- movement = max(abs(delta), abs(f_std), min_abs_shares)
- denom_std = movement
-
- movement_lag = max(abs(delta), abs(f_lag), min_abs_shares)
- denom_lag = movement_lag
-
- pct_std = abs(missing_std) / denom_std
- pct_lag = abs(missing_lag) / denom_lag
-
- broken_std = pct_std > alpha
- broken_lag = pct_lag > alpha
-
- # A "lag" month: broken with standard, NOT broken with extended window
- is_lag = broken_std and (not broken_lag)
-
- rows.append({
- "date": t_curr,
- "isin": isin,
- "q_agg_prev": round(q_prev, 3),
- "q_agg_curr": round(q_curr, 3),
- "delta_aum": round(delta, 3),
- "flow_agg": round(f_std, 3),
- "missing_flow": round(missing_std, 3),
- "missing_pct": round(pct_std, 6),
- "broken": broken_std,
- "is_lag": is_lag,
- })
-
- df_all = pd.DataFrame(rows)
- df_broken = df_all[df_all["broken"]].sort_values("missing_pct", ascending=False)
- return df_broken, df_all
-
-
-
-# ─────────────────────────────────────────────────────────────
-# 2b. AGGREGATE (CROSS-ISIN) BROKEN MONTHS
-# ─────────────────────────────────────────────────────────────
-
-def detect_aggregate_broken_months(aum, flows, alpha=0.02, lag_days=3):
- """
- Same stock-flow check as detect_broken_months, but aggregated
- across ALL ISINs for each month:
-
- Q_total(t) - Q_total(t-1) != F_total(t)
-
- where Q_total(t) = sum over all (reg_id, isin) of Q_{r,s}(t).
-
- This catches months where the global portfolio is incoherent even
- if every individual ISIN is fine (e.g. cross-ISIN netting errors),
- and provides a cleaner high-level view.
-
- Returns
- -------
- df_agg : DataFrame indexed by month with columns:
- q_total_prev, q_total_curr, delta_aum, flow_total,
- missing_flow, missing_pct, broken, is_lag
- """
- t_min = aum["Centralisation Date"].min()
- t_max = aum["Centralisation Date"].max()
- all_months = pd.date_range(t_min, t_max, freq="ME")
-
- # ── Total AUM per month (all ISIN, all accounts) ─────────────
- aum_monthly = (
- aum.groupby("Centralisation Date")["Quantity - AUM"]
- .sum()
- .reindex(all_months)
- .ffill()
- .rename("q_total")
- )
-
- # ── Bucket flows helper (reuse same window logic) ─────────────
- def bucket_total_flows(flows_df, months, lower_offset=0, upper_offset=0):
- fc = flows_df.copy()
- def assign_month(d):
- for m in months:
- eom_prev = m - pd.offsets.MonthEnd(1)
- lo = eom_prev - pd.Timedelta(days=lower_offset)
- hi = m + pd.Timedelta(days=upper_offset)
- if lo < d <= hi:
- return m
- return pd.NaT
- fc["month_end"] = fc["Centralisation Date"].apply(assign_month)
- fc = fc.dropna(subset=["month_end"])
- return (fc.groupby("month_end")["Quantity - NetFlows"]
- .sum()
- .reindex(months)
- .fillna(0.0))
-
- flow_std = bucket_total_flows(flows, all_months)
- flow_lag = bucket_total_flows(flows, all_months,
- lower_offset=lag_days, upper_offset=lag_days)
-
- # ── Compute residuals ─────────────────────────────────────────
- rows = []
- min_abs_shares = 1.0
-
- for i in range(1, len(all_months)):
- t_curr = all_months[i]
- t_prev = all_months[i - 1]
-
- q_curr = aum_monthly.get(t_curr, np.nan)
- q_prev = aum_monthly.get(t_prev, np.nan)
- if pd.isna(q_curr) or pd.isna(q_prev):
- continue
-
- delta = q_curr - q_prev
-
- f_std = flow_std.get(t_curr, 0.0)
- f_lag = flow_lag.get(t_curr, 0.0)
- miss_std = delta - f_std
- miss_lag = delta - f_lag
-
- movement_std = max(abs(delta), abs(f_std), min_abs_shares)
- movement_lag = max(abs(delta), abs(f_lag), min_abs_shares)
- pct_std = abs(miss_std) / movement_std
- pct_lag = abs(miss_lag) / movement_lag
-
- broken_std = pct_std > alpha
- broken_lag = pct_lag > alpha
- is_lag = broken_std and (not broken_lag)
-
- rows.append({
- "date": t_curr,
- "q_total_prev": round(q_prev, 3),
- "q_total_curr": round(q_curr, 3),
- "delta_aum": round(delta, 3),
- "flow_total": round(f_std, 3),
- "missing_flow": round(miss_std, 3),
- "missing_pct": round(pct_std, 6),
- "broken": broken_std,
- "is_lag": is_lag,
- })
-
- df_agg = pd.DataFrame(rows)
- return df_agg
-
-# ─────────────────────────────────────────────────────────────
-# 3. PRINT SUMMARY
-# ─────────────────────────────────────────────────────────────
-
-def print_summary(df_broken, df_all, alpha):
- total = len(df_all)
- n_broken = len(df_broken)
- n_lag = df_broken["is_lag"].sum()
-
- print("\n" + "=" * 60)
- print(" CARMIGNAC — Broken Months Diagnostics")
- print("=" * 60)
- print(f" (isin, month) pairs examined : {total}")
- print(f" Broken (missing_pct > {alpha:.0%}) : {n_broken} "
- f"({n_broken/total*100:.1f}%)")
- print(f" Of which likely lag : {n_lag}")
- print(f" Of which genuine gap : {n_broken - n_lag}")
-
- if n_broken:
- print("\n Top 10 by missing_pct:")
- cols = ["date", "isin", "missing_flow", "missing_pct", "is_lag"]
- print(df_broken[cols].head(10).to_string(index=False))
-
- # Monthly breakdown
- by_month = (df_broken.groupby("date")
- .agg(n_broken=("isin", "count"),
- total_missing=("missing_flow", lambda x: x.abs().sum()))
- .sort_values("n_broken", ascending=False)
- .head(5))
- if len(by_month):
- print("\n Most affected months:")
- print(by_month.to_string())
- print()
-
-
-# ─────────────────────────────────────────────────────────────
-# 4. BUILD HTML REPORT
-# ─────────────────────────────────────────────────────────────
-
-def build_html(df_broken, df_all, df_agg, alpha):
- # ── JS-ready data ────────────────────────────────────────────
- # Timeline: n_broken and total_missing per month
- tl = (df_all[df_all["broken"]]
- .groupby("date")
- .agg(n_broken=("isin", "count"),
- total_missing=("missing_flow", lambda x: x.abs().sum()),
- n_lag=("is_lag", "sum"))
- .reindex(df_all["date"].sort_values().unique())
- .fillna(0))
- tl.index = pd.to_datetime(tl.index)
- dates_str = json.dumps([d.strftime("%Y-%m-%d") for d in tl.index])
-
- def jf(arr, dec=4):
- return json.dumps([round(float(v), dec) if not np.isnan(v) else None for v in arr])
-
- n_broken_js = jf(tl["n_broken"].values, 0)
- total_miss_js = jf(tl["total_missing"].values)
- n_lag_js = jf(tl["n_lag"].values, 0)
-
- # Aggregate (cross-ISIN) JS data
- agg_dates_str = json.dumps([d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_agg["date"])])
- agg_delta_js = jf(df_agg["delta_aum"].values)
- agg_flow_js = jf(df_agg["flow_total"].values)
- agg_missing_js = jf(df_agg["missing_flow"].values)
- agg_pct_js = jf((df_agg["missing_pct"] * 100).values)
-
- # Aggregate KPIs
- n_agg_broken = int(df_agg["broken"].sum())
- n_agg_lag = int(df_agg["is_lag"].sum())
- n_agg_genuine = n_agg_broken - n_agg_lag
- max_agg_pct = float(df_agg["missing_pct"].max() * 100) if len(df_agg) else 0
-
- # Aggregate detail table rows
- agg_rows = []
- for _, r in df_agg[df_agg["broken"]].iterrows():
- lb = 'lag' if r["is_lag"] else ""
- pc = "pct-high" if r["missing_pct"] > 0.1 else "pct-med"
- ds = r["date"].strftime("%Y-%m-%d") if hasattr(r["date"], "strftime") else str(r["date"])[:10]
- mc = "miss-neg" if r["missing_flow"] < 0 else "miss-pos"
- agg_rows.append(
- f'
| {ds} | '
- f'{r["q_total_prev"]:,.1f} | '
- f'{r["q_total_curr"]:,.1f} | '
- f'{r["flow_total"]:,.1f} | '
- f'{r["missing_flow"]:+,.1f} | '
- f'{r["missing_pct"]*100:.2f}% | '
- f'{lb} |
'
- )
- agg_detail_rows = "".join(agg_rows) if agg_rows else (
- '| ✓ No broken months at aggregate level |
'
- )
-
- # Per-ISIN summary
- isin_sum = (df_broken.groupby("isin")
- .agg(n_months=("date", "count"),
- avg_pct=("missing_pct", "mean"),
- total_abs=("missing_flow", lambda x: x.abs().sum()))
- .sort_values("total_abs", ascending=False))
-
- ISIN_COLORS = [
- "#2563eb","#16a34a","#dc2626","#d97706","#7c3aed",
- "#0891b2","#db2777","#65a30d","#ea580c","#6366f1",
- ]
-
- # Per-ISIN missing_pct timeseries for the top 5 ISINs
- top_isins = isin_sum.head(5).index.tolist()
- all_dates = sorted(df_all["date"].unique())
- isin_ts_datasets = []
- for idx, isin in enumerate(top_isins):
- sub = df_all[df_all["isin"] == isin].set_index("date")["missing_pct"].reindex(all_dates).fillna(0)
- isin_ts_datasets.append({
- "label": isin,
- "data": [round(float(v) * 100, 3) for v in sub.values],
- "borderColor": ISIN_COLORS[idx % len(ISIN_COLORS)],
- "backgroundColor": ISIN_COLORS[idx % len(ISIN_COLORS)] + "22",
- "borderWidth": 2,
- "pointRadius": 0,
- "tension": 0.3,
- "fill": False,
- })
- isin_ts_json = json.dumps(isin_ts_datasets)
- all_dates_str = json.dumps([d.strftime("%Y-%m-%d") if hasattr(d, 'strftime')
- else str(d)[:10] for d in all_dates])
-
- # Detail table rows
- detail_rows = ""
- for _, r in df_broken.head(200).iterrows():
- lag_badge = 'lag' if r["is_lag"] else ""
- pct_class = "pct-high" if r["missing_pct"] > 0.1 else "pct-med"
- detail_rows += f"""
-
- | {r['date'].strftime('%Y-%m-%d') if hasattr(r['date'], 'strftime') else str(r['date'])[:10]} |
- {r['isin']} |
- {r['q_agg_prev']:,.1f} |
- {r['q_agg_curr']:,.1f} |
- {r['flow_agg']:,.1f} |
- {r['missing_flow']:+,.1f} |
- {r['missing_pct']*100:.2f}% |
- {lag_badge} |
-
"""
-
- # ISIN summary table
- isin_rows = ""
- for isin, row in isin_sum.iterrows():
- isin_rows += f"""
-
- | {isin} |
- {int(row['n_months'])} |
- {row['avg_pct']*100:.2f}% |
- {row['total_abs']:,.1f} |
-
"""
-
- # KPIs
- total = len(df_all)
- n_broken_kpi = len(df_broken)
- n_lag_kpi = int(df_broken["is_lag"].sum())
- n_genuine = n_broken_kpi - n_lag_kpi
- max_pct = df_broken["missing_pct"].max() * 100 if len(df_broken) else 0
- n_isins = df_broken["isin"].nunique()
-
- no_broken_msg = ""
- if n_broken_kpi == 0:
- no_broken_msg = '✓ No broken months detected at this threshold.
'
-
- html = f"""
-
-
-
-
-Carmignac — Broken Months Diagnostics
-
-
-
-
-
-
-
-
-
- (ISIN, month) pairs
- {total:,}
- examined
-
-
- Broken months
- {n_broken_kpi:,}
- {n_broken_kpi/total*100:.1f}% of pairs
-
-
- Likely lags
- {n_lag_kpi}
- resolved by ±{3}d window
-
-
- Genuine gaps
- {n_genuine}
- unresolved by lag fix
-
-
- ISINs affected
- {n_isins}
- distinct ISINs
-
-
- Max missing %
- {max_pct:.1f}%
- worst single (isin, month)
-
-
-
-
-
-
00 · Aggregate view — all ISINs combined
-
-
-
-
-
-
-
-
-
-
- | Date |
- Σ Q(t−1) | Σ Q(t) |
- Σ Flow | Missing |
- Missing % | |
-
- {agg_detail_rows}
-
-
-
-
-
01 · Timeline — per ISIN
-
-
-
-
-
-
02 · By ISIN
-
-
-
-
- {'
No broken months detected.
' if n_broken_kpi == 0 else f"""
-
-
- | ISIN | Broken months |
- Avg missing % | Total |missing| (shares) |
-
- {isin_rows}
-
"""}
-
-
-
-
03 · Detail log
-
-
-
-
Threshold α = {alpha:.1%} · showing up to 200 rows
-
- {'
✓ No broken months detected at this threshold.
' if n_broken_kpi == 0 else f"""
-
-
- | Date | ISIN |
- Q(t-1) | Q(t) |
- Net flow | Missing |
- Missing % of movement | |
-
- {detail_rows}
-
"""}
-
-
-
-
-
-
-
-
-"""
- return html
-
-
-# ─────────────────────────────────────────────────────────────
-# 5. MAIN
-# ─────────────────────────────────────────────────────────────
-
-def main():
- parser = argparse.ArgumentParser(
- description="Detect broken months in Carmignac AUM/Flows data"
- )
- parser.add_argument("--aum", default="AUM_head.csv")
- parser.add_argument("--flows", default="flows_head.csv")
- parser.add_argument("--out", default="carmignac_broken_months.csv",
- help="Machine-readable output (loaded by carmignac_repair.py)")
- parser.add_argument("--html", default="carmignac_diagnostics.html")
- parser.add_argument("--alpha", type=float, default=0.02,
- help="Tolerance threshold (default 0.02 = 2%%)")
- parser.add_argument("--lag", type=int, default=3,
- help="Boundary days to test for accounting lag (default 3)")
- args = parser.parse_args()
-
- def resolve(p):
- if os.path.exists(p): return p
- alt = os.path.join(os.path.dirname(os.path.abspath(__file__)), p)
- if os.path.exists(alt): return alt
- sys.exit(f"[ERROR] File not found: {p}")
-
- print(f"[Load] AUM : {args.aum}")
- print(f"[Load] Flows : {args.flows}")
- aum, flows = load_data(resolve(args.aum), resolve(args.flows))
-
- print(f"\n[Detect] Running broken-month detection (α={args.alpha:.1%}, lag=±{args.lag}d)...")
- df_broken, df_all = detect_broken_months(aum, flows, alpha=args.alpha, lag_days=args.lag)
- df_agg = detect_aggregate_broken_months(aum, flows, alpha=args.alpha, lag_days=args.lag)
-
- print_summary(df_broken, df_all, args.alpha)
-
- n_agg_broken = int(df_agg["broken"].sum())
- print(f" Aggregate broken months : {n_agg_broken} "
- f"(of which lags: {int(df_agg['is_lag'].sum())})")
-
- # CSV output — this is what carmignac_repair.py will load
- if len(df_broken):
- df_broken.to_csv(args.out, index=False)
- print(f"[Export] Broken months CSV → {args.out}")
- else:
- pd.DataFrame(columns=["date","isin","missing_pct","is_lag"]).to_csv(args.out, index=False)
- print(f"[Export] No broken months — empty CSV → {args.out}")
-
- html = build_html(df_broken, df_all, df_agg, args.alpha)
- with open(args.html, "w", encoding="utf-8") as f:
- f.write(html)
- print(f"[Export] HTML report → {args.html}")
-
-
-if __name__ == "__main__":
- main()
diff --git a/data/explore.ipynb b/data/explore.ipynb
deleted file mode 100644
index fc2b7b1..0000000
--- a/data/explore.ipynb
+++ /dev/null
@@ -1,1346 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "bd938e6e",
- "metadata": {},
- "source": [
- "**Short notebook to test connectivity with S3 services and explore the data**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "127753ac",
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "ae3c64fe",
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import s3fs\n",
- "fs = s3fs.S3FileSystem(\n",
- " client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
- " key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
- " secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
- " token = os.environ[\"AWS_SESSION_TOKEN\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "84b9ac42",
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "\n",
- "def sample_by_blocks(df, block_size=10, num_blocks=10, random_state=None):\n",
- " \"\"\"Sample num_blocks blocks of block_size consecutive rows (no overlapping blocks).\"\"\"\n",
- " n = len(df)\n",
- " max_start = n - block_size\n",
- " if max_start < 0:\n",
- " raise ValueError(f\"DataFrame has {n} rows, need at least {block_size}\")\n",
- " if max_start + 1 < num_blocks:\n",
- " raise ValueError(f\"Not enough room for {num_blocks} non-overlapping blocks (need at least {num_blocks * block_size} rows)\")\n",
- " rng = np.random.default_rng(random_state)\n",
- " chosen_starts = rng.choice(max_start + 1, size=num_blocks, replace=False)\n",
- " chosen_starts.sort() # blocks in order of position in original df\n",
- " indices = np.concatenate([np.arange(s, s + block_size) for s in chosen_starts])\n",
- " return df.iloc[indices].reset_index(drop=True)\n",
- "\n",
- "# sample_df = sample_by_blocks(df, block_size=10, num_blocks=10, random_state=42)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7f7d45bb",
- "metadata": {},
- "source": [
- "### OG AUM"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "83472648",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_175445/2279824029.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " aum = pd.read_csv(f, sep =\";\")\n"
- ]
- }
- ],
- "source": [
- "with fs.open('s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
- " aum = pd.read_csv(f, sep =\";\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "0b84ede5",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Agreement - Code | \n",
- " Company - Id | \n",
- " Company - Ultimate Parent Id | \n",
- " Registrar Account - ID | \n",
- " Registrar Account - Region | \n",
- " RegistrarAccount - Country | \n",
- " Product - Asset Type | \n",
- " Product - Strategy | \n",
- " Product - Legal Status | \n",
- " Product - Is Dedie ? | \n",
- " Product - Fund | \n",
- " Product - Shareclass Type | \n",
- " Product - Shareclass Currency | \n",
- " Product - Isin | \n",
- " Centralisation Date | \n",
- " Quantity - AUM | \n",
- " Value - AUM CCY | \n",
- " Value - AUM € | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 2716081 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " NaN | \n",
- " Infotech | \n",
- " SICAV | \n",
- " NO | \n",
- " Carmignac Portfolio Infotech | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2015-06-30 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 2716082 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " NaN | \n",
- " Infotech | \n",
- " SICAV | \n",
- " NO | \n",
- " Carmignac Portfolio Infotech | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2016-03-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 2716092 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " NaN | \n",
- " Infotech | \n",
- " SICAV | \n",
- " NO | \n",
- " Carmignac Portfolio Infotech | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2017-05-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 2716093 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " NaN | \n",
- " Infotech | \n",
- " SICAV | \n",
- " NO | \n",
- " Carmignac Portfolio Infotech | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2019-02-28 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 2716094 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " NaN | \n",
- " Infotech | \n",
- " SICAV | \n",
- " NO | \n",
- " Carmignac Portfolio Infotech | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2019-03-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 3652177 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " Equity | \n",
- " Investissement Latitude | \n",
- " FCP | \n",
- " NO | \n",
- " Carmignac Investissement Latitude | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2018-05-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 3652178 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " Equity | \n",
- " Investissement Latitude | \n",
- " FCP | \n",
- " NO | \n",
- " Carmignac Investissement Latitude | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2018-06-30 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 3652179 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " Equity | \n",
- " Investissement Latitude | \n",
- " FCP | \n",
- " NO | \n",
- " Carmignac Investissement Latitude | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2018-12-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 3652180 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " Equity | \n",
- " Investissement Latitude | \n",
- " FCP | \n",
- " NO | \n",
- " Carmignac Investissement Latitude | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2019-08-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | 3652181 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " Luxembourg | \n",
- " Luxembourg | \n",
- " Equity | \n",
- " Investissement Latitude | \n",
- " FCP | \n",
- " NO | \n",
- " Carmignac Investissement Latitude | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2020-01-31 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
640 rows × 18 columns
\n",
- "
"
- ],
- "text/plain": [
- " Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
- "2716081 L134 15292.0 15292.0 \n",
- "2716082 L134 15292.0 15292.0 \n",
- "2716092 L134 15292.0 15292.0 \n",
- "2716093 L134 15292.0 15292.0 \n",
- "2716094 L134 15292.0 15292.0 \n",
- "... ... ... ... \n",
- "3652177 L134 15292.0 15292.0 \n",
- "3652178 L134 15292.0 15292.0 \n",
- "3652179 L134 15292.0 15292.0 \n",
- "3652180 L134 15292.0 15292.0 \n",
- "3652181 L134 15292.0 15292.0 \n",
- "\n",
- " Registrar Account - ID Registrar Account - Region \\\n",
- "2716081 11215 Luxembourg \n",
- "2716082 11215 Luxembourg \n",
- "2716092 11215 Luxembourg \n",
- "2716093 11215 Luxembourg \n",
- "2716094 11215 Luxembourg \n",
- "... ... ... \n",
- "3652177 11215 Luxembourg \n",
- "3652178 11215 Luxembourg \n",
- "3652179 11215 Luxembourg \n",
- "3652180 11215 Luxembourg \n",
- "3652181 11215 Luxembourg \n",
- "\n",
- " RegistrarAccount - Country Product - Asset Type \\\n",
- "2716081 Luxembourg NaN \n",
- "2716082 Luxembourg NaN \n",
- "2716092 Luxembourg NaN \n",
- "2716093 Luxembourg NaN \n",
- "2716094 Luxembourg NaN \n",
- "... ... ... \n",
- "3652177 Luxembourg Equity \n",
- "3652178 Luxembourg Equity \n",
- "3652179 Luxembourg Equity \n",
- "3652180 Luxembourg Equity \n",
- "3652181 Luxembourg Equity \n",
- "\n",
- " Product - Strategy Product - Legal Status Product - Is Dedie ? \\\n",
- "2716081 Infotech SICAV NO \n",
- "2716082 Infotech SICAV NO \n",
- "2716092 Infotech SICAV NO \n",
- "2716093 Infotech SICAV NO \n",
- "2716094 Infotech SICAV NO \n",
- "... ... ... ... \n",
- "3652177 Investissement Latitude FCP NO \n",
- "3652178 Investissement Latitude FCP NO \n",
- "3652179 Investissement Latitude FCP NO \n",
- "3652180 Investissement Latitude FCP NO \n",
- "3652181 Investissement Latitude FCP NO \n",
- "\n",
- " Product - Fund Product - Shareclass Type \\\n",
- "2716081 Carmignac Portfolio Infotech A \n",
- "2716082 Carmignac Portfolio Infotech A \n",
- "2716092 Carmignac Portfolio Infotech A \n",
- "2716093 Carmignac Portfolio Infotech A \n",
- "2716094 Carmignac Portfolio Infotech A \n",
- "... ... ... \n",
- "3652177 Carmignac Investissement Latitude A \n",
- "3652178 Carmignac Investissement Latitude A \n",
- "3652179 Carmignac Investissement Latitude A \n",
- "3652180 Carmignac Investissement Latitude A \n",
- "3652181 Carmignac Investissement Latitude A \n",
- "\n",
- " Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
- "2716081 EUR LU0109929157 2015-06-30 \n",
- "2716082 EUR LU0109929157 2016-03-31 \n",
- "2716092 EUR LU0109929157 2017-05-31 \n",
- "2716093 EUR LU0109929157 2019-02-28 \n",
- "2716094 EUR LU0109929157 2019-03-31 \n",
- "... ... ... ... \n",
- "3652177 EUR FR0010147603 2018-05-31 \n",
- "3652178 EUR FR0010147603 2018-06-30 \n",
- "3652179 EUR FR0010147603 2018-12-31 \n",
- "3652180 EUR FR0010147603 2019-08-31 \n",
- "3652181 EUR FR0010147603 2020-01-31 \n",
- "\n",
- " Quantity - AUM Value - AUM CCY Value - AUM € \n",
- "2716081 0.0 0.0 0.0 \n",
- "2716082 0.0 0.0 0.0 \n",
- "2716092 0.0 0.0 0.0 \n",
- "2716093 0.0 0.0 0.0 \n",
- "2716094 0.0 0.0 0.0 \n",
- "... ... ... ... \n",
- "3652177 0.0 0.0 0.0 \n",
- "3652178 0.0 0.0 0.0 \n",
- "3652179 0.0 0.0 0.0 \n",
- "3652180 0.0 0.0 0.0 \n",
- "3652181 0.0 0.0 0.0 \n",
- "\n",
- "[640 rows x 18 columns]"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "aum[aum[\"Registrar Account - ID\"] == 11215]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6f40c922",
- "metadata": {},
- "source": [
- "### Repaired AUM"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "b6edd4fd",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_175445/204804706.py:2: DtypeWarning: Columns (2,3,4,5) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " df_repaired = pd.read_csv(f, sep =\",\")\n"
- ]
- }
- ],
- "source": [
- "with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f:\n",
- " df_repaired = pd.read_csv(f, sep =\",\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "2521a2a6",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0.1 | \n",
- " Unnamed: 0 | \n",
- " Agreement - Code | \n",
- " Company - Id | \n",
- " Company - Ultimate Parent Id | \n",
- " Registrar Account - ID | \n",
- " Registrar Account - Region | \n",
- " RegistrarAccount - Country | \n",
- " Product - Asset Type | \n",
- " Product - Strategy | \n",
- " ... | \n",
- " Product - Is Dedie ? | \n",
- " Product - Fund | \n",
- " Product - Shareclass Type | \n",
- " Product - Shareclass Currency | \n",
- " Product - Isin | \n",
- " Centralisation Date | \n",
- " Quantity - AUM | \n",
- " Value - AUM CCY | \n",
- " Value - AUM € | \n",
- " repair_flag | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 2995412 | \n",
- " 2995412 | \n",
- " 2716081 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " NAN | \n",
- " INFOTECH | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC PORTFOLIO INFOTECH | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2015-06-30 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2995413 | \n",
- " 2995413 | \n",
- " 2716082 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " NAN | \n",
- " INFOTECH | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC PORTFOLIO INFOTECH | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2016-03-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2995423 | \n",
- " 2995423 | \n",
- " 2716092 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " NAN | \n",
- " INFOTECH | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC PORTFOLIO INFOTECH | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2017-05-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2995424 | \n",
- " 2995424 | \n",
- " 2716093 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " NAN | \n",
- " INFOTECH | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC PORTFOLIO INFOTECH | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2019-02-28 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 2995425 | \n",
- " 2995425 | \n",
- " 2716094 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " NAN | \n",
- " INFOTECH | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC PORTFOLIO INFOTECH | \n",
- " A | \n",
- " EUR | \n",
- " LU0109929157 | \n",
- " 2019-03-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 4073314 | \n",
- " 4073314 | \n",
- " 3652177 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " EQUITY | \n",
- " INVESTISSEMENT LATITUDE | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC INVESTISSEMENT LATITUDE | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2018-05-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4073315 | \n",
- " 4073315 | \n",
- " 3652178 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " EQUITY | \n",
- " INVESTISSEMENT LATITUDE | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC INVESTISSEMENT LATITUDE | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2018-06-30 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4073316 | \n",
- " 4073316 | \n",
- " 3652179 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " EQUITY | \n",
- " INVESTISSEMENT LATITUDE | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC INVESTISSEMENT LATITUDE | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2018-12-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4073317 | \n",
- " 4073317 | \n",
- " 3652180 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " EQUITY | \n",
- " INVESTISSEMENT LATITUDE | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC INVESTISSEMENT LATITUDE | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2019-08-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4073318 | \n",
- " 4073318 | \n",
- " 3652181 | \n",
- " L134 | \n",
- " 15292.0 | \n",
- " 15292.0 | \n",
- " 11215 | \n",
- " LUXEMBOURG | \n",
- " LUXEMBOURG | \n",
- " EQUITY | \n",
- " INVESTISSEMENT LATITUDE | \n",
- " ... | \n",
- " NO | \n",
- " CARMIGNAC INVESTISSEMENT LATITUDE | \n",
- " A | \n",
- " EUR | \n",
- " FR0010147603 | \n",
- " 2020-01-31 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " False | \n",
- "
\n",
- " \n",
- "
\n",
- "
640 rows × 21 columns
\n",
- "
"
- ],
- "text/plain": [
- " Unnamed: 0.1 Unnamed: 0 Agreement - Code Company - Id \\\n",
- "2995412 2995412 2716081 L134 15292.0 \n",
- "2995413 2995413 2716082 L134 15292.0 \n",
- "2995423 2995423 2716092 L134 15292.0 \n",
- "2995424 2995424 2716093 L134 15292.0 \n",
- "2995425 2995425 2716094 L134 15292.0 \n",
- "... ... ... ... ... \n",
- "4073314 4073314 3652177 L134 15292.0 \n",
- "4073315 4073315 3652178 L134 15292.0 \n",
- "4073316 4073316 3652179 L134 15292.0 \n",
- "4073317 4073317 3652180 L134 15292.0 \n",
- "4073318 4073318 3652181 L134 15292.0 \n",
- "\n",
- " Company - Ultimate Parent Id Registrar Account - ID \\\n",
- "2995412 15292.0 11215 \n",
- "2995413 15292.0 11215 \n",
- "2995423 15292.0 11215 \n",
- "2995424 15292.0 11215 \n",
- "2995425 15292.0 11215 \n",
- "... ... ... \n",
- "4073314 15292.0 11215 \n",
- "4073315 15292.0 11215 \n",
- "4073316 15292.0 11215 \n",
- "4073317 15292.0 11215 \n",
- "4073318 15292.0 11215 \n",
- "\n",
- " Registrar Account - Region RegistrarAccount - Country \\\n",
- "2995412 LUXEMBOURG LUXEMBOURG \n",
- "2995413 LUXEMBOURG LUXEMBOURG \n",
- "2995423 LUXEMBOURG LUXEMBOURG \n",
- "2995424 LUXEMBOURG LUXEMBOURG \n",
- "2995425 LUXEMBOURG LUXEMBOURG \n",
- "... ... ... \n",
- "4073314 LUXEMBOURG LUXEMBOURG \n",
- "4073315 LUXEMBOURG LUXEMBOURG \n",
- "4073316 LUXEMBOURG LUXEMBOURG \n",
- "4073317 LUXEMBOURG LUXEMBOURG \n",
- "4073318 LUXEMBOURG LUXEMBOURG \n",
- "\n",
- " Product - Asset Type Product - Strategy ... \\\n",
- "2995412 NAN INFOTECH ... \n",
- "2995413 NAN INFOTECH ... \n",
- "2995423 NAN INFOTECH ... \n",
- "2995424 NAN INFOTECH ... \n",
- "2995425 NAN INFOTECH ... \n",
- "... ... ... ... \n",
- "4073314 EQUITY INVESTISSEMENT LATITUDE ... \n",
- "4073315 EQUITY INVESTISSEMENT LATITUDE ... \n",
- "4073316 EQUITY INVESTISSEMENT LATITUDE ... \n",
- "4073317 EQUITY INVESTISSEMENT LATITUDE ... \n",
- "4073318 EQUITY INVESTISSEMENT LATITUDE ... \n",
- "\n",
- " Product - Is Dedie ? Product - Fund \\\n",
- "2995412 NO CARMIGNAC PORTFOLIO INFOTECH \n",
- "2995413 NO CARMIGNAC PORTFOLIO INFOTECH \n",
- "2995423 NO CARMIGNAC PORTFOLIO INFOTECH \n",
- "2995424 NO CARMIGNAC PORTFOLIO INFOTECH \n",
- "2995425 NO CARMIGNAC PORTFOLIO INFOTECH \n",
- "... ... ... \n",
- "4073314 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
- "4073315 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
- "4073316 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
- "4073317 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
- "4073318 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
- "\n",
- " Product - Shareclass Type Product - Shareclass Currency \\\n",
- "2995412 A EUR \n",
- "2995413 A EUR \n",
- "2995423 A EUR \n",
- "2995424 A EUR \n",
- "2995425 A EUR \n",
- "... ... ... \n",
- "4073314 A EUR \n",
- "4073315 A EUR \n",
- "4073316 A EUR \n",
- "4073317 A EUR \n",
- "4073318 A EUR \n",
- "\n",
- " Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n",
- "2995412 LU0109929157 2015-06-30 0.0 NaN \n",
- "2995413 LU0109929157 2016-03-31 0.0 NaN \n",
- "2995423 LU0109929157 2017-05-31 0.0 NaN \n",
- "2995424 LU0109929157 2019-02-28 0.0 NaN \n",
- "2995425 LU0109929157 2019-03-31 0.0 NaN \n",
- "... ... ... ... ... \n",
- "4073314 FR0010147603 2018-05-31 0.0 NaN \n",
- "4073315 FR0010147603 2018-06-30 0.0 NaN \n",
- "4073316 FR0010147603 2018-12-31 0.0 NaN \n",
- "4073317 FR0010147603 2019-08-31 0.0 NaN \n",
- "4073318 FR0010147603 2020-01-31 0.0 NaN \n",
- "\n",
- " Value - AUM € repair_flag \n",
- "2995412 NaN False \n",
- "2995413 NaN False \n",
- "2995423 NaN False \n",
- "2995424 NaN False \n",
- "2995425 NaN False \n",
- "... ... ... \n",
- "4073314 NaN False \n",
- "4073315 NaN False \n",
- "4073316 NaN False \n",
- "4073317 NaN False \n",
- "4073318 NaN False \n",
- "\n",
- "[640 rows x 21 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_repaired[df_repaired[\"Registrar Account - ID\"] == 11215]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "74ab7fb4",
- "metadata": {},
- "source": [
- "### Flows"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3347dc39",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_159596/3878087020.py:2: DtypeWarning: Columns (1,2,3,4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " flows = pd.read_csv(f, sep =\",\")\n"
- ]
- }
- ],
- "source": [
- "with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f:\n",
- " flows = pd.read_csv(f, sep =\",\")\n",
- "\n",
- "sample_flows = sample_by_blocks(flows, block_size=10, num_blocks=10, random_state=42)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "4bb4f9c7",
- "metadata": {},
- "source": [
- "## Clustering"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "5957c389",
- "metadata": {},
- "outputs": [],
- "source": [
- "def load_and_clean_data(flows_path, aum_path, rates_path, gov_path):\n",
- " \"\"\"\n",
- " Loads raw CSVs and parses dates for consistent time-series analysis.\n",
- " \"\"\"\n",
- "\n",
- " flows = pd.read_csv(flows_path)\n",
- " flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])\n",
- " \n",
- " aum = pd.read_csv(aum_path)\n",
- " aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])\n",
- " \n",
- " rates = pd.read_csv(rates_path)\n",
- " try:\n",
- " rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True)\n",
- " except:\n",
- " rates['Date'] = pd.to_datetime(rates['Date'])\n",
- " \n",
- " gov = pd.read_csv(gov_path)\n",
- " gov['Date'] = pd.to_datetime(gov['Date'])\n",
- " \n",
- " return flows, aum, rates, gov"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "479a50b8",
- "metadata": {},
- "outputs": [],
- "source": [
- "flows_path = \"flows_sample.csv\"\n",
- "aum_path = \"aum_sample.csv\"\n",
- "rates_path = \"str_rates.csv\"\n",
- "gov_path = \"eur_gov_indices.csv\"\n",
- "\n",
- "flows, aum, rates, gov = load_and_clean_data(flows_path, aum_path, rates_path, gov_path)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "a6228231",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Bond/Index | \n",
- " Description | \n",
- " Date | \n",
- " Total Return % 1-wk-LOC | \n",
- " Yield to Maturity (s.a.) | \n",
- " Yield to Maturity (conv.) | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-01-07 | \n",
- " 0.484 | \n",
- " 3.06 | \n",
- " 3.08 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-01-14 | \n",
- " 0.414 | \n",
- " 3.00 | \n",
- " 3.03 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-01-21 | \n",
- " 0.006 | \n",
- " 3.02 | \n",
- " 3.04 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-01-28 | \n",
- " 0.208 | \n",
- " 3.00 | \n",
- " 3.03 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-02-04 | \n",
- " 0.435 | \n",
- " 3.01 | \n",
- " 3.03 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-02-11 | \n",
- " 0.221 | \n",
- " 2.98 | \n",
- " 3.00 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-02-18 | \n",
- " -1.020 | \n",
- " 3.13 | \n",
- " 3.16 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-02-25 | \n",
- " -0.198 | \n",
- " 3.16 | \n",
- " 3.19 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-03-04 | \n",
- " 0.228 | \n",
- " 3.15 | \n",
- " 3.18 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-03-11 | \n",
- " -0.380 | \n",
- " 3.20 | \n",
- " 3.23 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-03-18 | \n",
- " 0.343 | \n",
- " 3.19 | \n",
- " 3.21 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-03-25 | \n",
- " 0.528 | \n",
- " 3.15 | \n",
- " 3.17 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-04-01 | \n",
- " 0.713 | \n",
- " 3.11 | \n",
- " 3.13 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-04-08 | \n",
- " 0.014 | \n",
- " 3.09 | \n",
- " 3.12 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-04-15 | \n",
- " 0.515 | \n",
- " 3.02 | \n",
- " 3.04 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-04-22 | \n",
- " 0.322 | \n",
- " 2.98 | \n",
- " 3.00 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-04-29 | \n",
- " 0.444 | \n",
- " 2.91 | \n",
- " 2.94 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-05-06 | \n",
- " -0.182 | \n",
- " 2.95 | \n",
- " 2.97 | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-05-13 | \n",
- " 0.808 | \n",
- " 2.85 | \n",
- " 2.88 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " G0D0 | \n",
- " ICE BofA German Government Index | \n",
- " 2005-05-20 | \n",
- " -0.090 | \n",
- " 2.89 | \n",
- " 2.91 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Bond/Index Description Date \\\n",
- "0 G0D0 ICE BofA German Government Index 2005-01-07 \n",
- "1 G0D0 ICE BofA German Government Index 2005-01-14 \n",
- "2 G0D0 ICE BofA German Government Index 2005-01-21 \n",
- "3 G0D0 ICE BofA German Government Index 2005-01-28 \n",
- "4 G0D0 ICE BofA German Government Index 2005-02-04 \n",
- "5 G0D0 ICE BofA German Government Index 2005-02-11 \n",
- "6 G0D0 ICE BofA German Government Index 2005-02-18 \n",
- "7 G0D0 ICE BofA German Government Index 2005-02-25 \n",
- "8 G0D0 ICE BofA German Government Index 2005-03-04 \n",
- "9 G0D0 ICE BofA German Government Index 2005-03-11 \n",
- "10 G0D0 ICE BofA German Government Index 2005-03-18 \n",
- "11 G0D0 ICE BofA German Government Index 2005-03-25 \n",
- "12 G0D0 ICE BofA German Government Index 2005-04-01 \n",
- "13 G0D0 ICE BofA German Government Index 2005-04-08 \n",
- "14 G0D0 ICE BofA German Government Index 2005-04-15 \n",
- "15 G0D0 ICE BofA German Government Index 2005-04-22 \n",
- "16 G0D0 ICE BofA German Government Index 2005-04-29 \n",
- "17 G0D0 ICE BofA German Government Index 2005-05-06 \n",
- "18 G0D0 ICE BofA German Government Index 2005-05-13 \n",
- "19 G0D0 ICE BofA German Government Index 2005-05-20 \n",
- "\n",
- " Total Return % 1-wk-LOC Yield to Maturity (s.a.) \\\n",
- "0 0.484 3.06 \n",
- "1 0.414 3.00 \n",
- "2 0.006 3.02 \n",
- "3 0.208 3.00 \n",
- "4 0.435 3.01 \n",
- "5 0.221 2.98 \n",
- "6 -1.020 3.13 \n",
- "7 -0.198 3.16 \n",
- "8 0.228 3.15 \n",
- "9 -0.380 3.20 \n",
- "10 0.343 3.19 \n",
- "11 0.528 3.15 \n",
- "12 0.713 3.11 \n",
- "13 0.014 3.09 \n",
- "14 0.515 3.02 \n",
- "15 0.322 2.98 \n",
- "16 0.444 2.91 \n",
- "17 -0.182 2.95 \n",
- "18 0.808 2.85 \n",
- "19 -0.090 2.89 \n",
- "\n",
- " Yield to Maturity (conv.) \n",
- "0 3.08 \n",
- "1 3.03 \n",
- "2 3.04 \n",
- "3 3.03 \n",
- "4 3.03 \n",
- "5 3.00 \n",
- "6 3.16 \n",
- "7 3.19 \n",
- "8 3.18 \n",
- "9 3.23 \n",
- "10 3.21 \n",
- "11 3.17 \n",
- "12 3.13 \n",
- "13 3.12 \n",
- "14 3.04 \n",
- "15 3.00 \n",
- "16 2.94 \n",
- "17 2.97 \n",
- "18 2.88 \n",
- "19 2.91 "
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "gov.head(20)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/repair_challenge/carmignac_analysis.py b/repair_challenge/carmignac_analysis.py
index b9b9151..4a2be1c 100644
--- a/repair_challenge/carmignac_analysis.py
+++ b/repair_challenge/carmignac_analysis.py
@@ -27,7 +27,8 @@ import pandas as pd
# 1. LOAD & VALIDATE
# ─────────────────────────────────────────────────────────────
-def load_outputs(scores_path, mapping_path, surgery_path):
+def load_outputs(scores_path, mapping_path, surgery_path,
+ err_isin_path=None, err_agg_path=None):
scores = pd.read_csv(scores_path, parse_dates=["date"])
mapping = pd.read_csv(mapping_path, parse_dates=["date"])
surgery = pd.read_csv(surgery_path, parse_dates=["date"])
@@ -40,9 +41,44 @@ def load_outputs(scores_path, mapping_path, surgery_path):
surgery["reg_orig"] = surgery["reg_orig"].astype(str)
surgery["reg_from"] = surgery["reg_from"].astype(str)
surgery["reg_to"] = surgery["reg_to"].astype(str)
+ if "lookback_months" not in surgery.columns:
+ surgery["lookback_months"] = 1 # backwards compat
- return scores, mapping, surgery
+ # Error account (optional)
+ err_isin = None
+ err_agg = None
+ if err_isin_path and os.path.exists(err_isin_path):
+ err_isin = pd.read_csv(err_isin_path, parse_dates=["date"])
+ err_isin["isin"] = err_isin["isin"].astype(str)
+ if err_agg_path and os.path.exists(err_agg_path):
+ err_agg = pd.read_csv(err_agg_path, parse_dates=["date"])
+ return scores, mapping, surgery, err_isin, err_agg
+
+
+
+# ─────────────────────────────────────────────────────────────
+# 1b. LOAD ERROR ACCOUNT (optional)
+# ─────────────────────────────────────────────────────────────
+
+def load_error_account(isin_path, agg_path):
+ """
+ Loads the error account CSVs produced by carmignac_diagnostics.py.
+ Returns (df_err_isin, df_err_agg) or (None, None) if files not found.
+ """
+ if not isin_path or not agg_path:
+ return None, None
+ try:
+ ei = pd.read_csv(isin_path, parse_dates=["date"])
+ ea = pd.read_csv(agg_path, parse_dates=["date"])
+ ei["isin"] = ei["isin"].astype(str)
+ print(f"[Load] error account (ISIN) : {len(ei)} rows, "
+ f"{ei['isin'].nunique()} ISINs")
+ print(f"[Load] error account (agg) : {len(ea)} rows")
+ return ei, ea
+ except Exception as e:
+ print(f"[WARN] Could not load error account: {e}")
+ return None, None
# ─────────────────────────────────────────────────────────────
# 2. COMPUTE ANALYTICS
@@ -195,7 +231,7 @@ def print_summary(analytics, surgery):
# 4. BUILD HTML REPORT
# ─────────────────────────────────────────────────────────────
-def build_html(analytics, surgery, scores, mapping):
+def build_html(analytics, surgery, scores, mapping, df_err_isin=None, df_err_agg=None):
tl = analytics["timeline"]
ss = analytics["surgery_stats"]
piv = analytics["pivot"]
@@ -257,14 +293,212 @@ def build_html(analytics, surgery, scores, mapping):
traj_json = json.dumps(traj_datasets)
+ # ── 4.2b Error account data (optional) ────────────────────
+ has_error = df_err_isin is not None and df_err_agg is not None
+
+ if has_error:
+ err_dates = [d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_err_agg["date"])]
+ err_agg_stock = [round(float(v), 3) if not pd.isna(v) else None
+ for v in df_err_agg["stock_error_agg"].values]
+ err_agg_res = [round(float(v), 3) if not pd.isna(v) else None
+ for v in df_err_agg["residual_agg"].values]
+ err_agg_pct = [round(float(v), 4) if not pd.isna(v) else None
+ for v in df_err_agg["stock_error_agg_pct"].values]
+
+ # Top 5 ISINs by max |stock error|
+ top_err = (df_err_isin.groupby("isin")["stock_error"]
+ .apply(lambda x: x.abs().max())
+ .nlargest(5).index.tolist())
+ all_err_dates = sorted(df_err_isin["date"].unique())
+ ERR_COLORS = ["#ef4444","#f59e0b","#8b5cf6","#06b6d4","#10b981"]
+ err_isin_ds = []
+ for idx, isin in enumerate(top_err):
+ sub = (df_err_isin[df_err_isin["isin"] == isin]
+ .set_index("date")["stock_error"]
+ .reindex(all_err_dates))
+ err_isin_ds.append({
+ "label": isin,
+ "data": [round(float(v), 3) if not pd.isna(v) else None for v in sub.values],
+ "borderColor": ERR_COLORS[idx % len(ERR_COLORS)],
+ "backgroundColor": ERR_COLORS[idx % len(ERR_COLORS)] + "22",
+ "borderWidth": 1.5, "pointRadius": 0, "tension": 0.3, "fill": False,
+ })
+
+ max_err_stock = float(df_err_agg["stock_error_agg"].abs().max())
+ max_err_pct = float(df_err_agg["stock_error_agg_pct"].max())
+ agg_std = float(df_err_agg["stock_error_agg"].std())
+ agg_mean = float(df_err_agg["stock_error_agg"].abs().mean())
+ stationarity = round(agg_std / max(agg_mean, 1e-9), 3)
+
+ err_dates_js = json.dumps(err_dates)
+ err_agg_stock_js = json.dumps(err_agg_stock)
+ err_agg_res_js = json.dumps(err_agg_res)
+ err_agg_pct_js = json.dumps(err_agg_pct)
+ err_isin_ds_js = json.dumps(err_isin_ds)
+ err_isin_dates_js = json.dumps([d.strftime("%Y-%m-%d") if hasattr(d, "strftime")
+ else str(d)[:10] for d in all_err_dates])
+
+ # ISIN detail table (top 100 worst)
+ err_rows = []
+ for _, r in (df_err_isin.assign(abs_s=df_err_isin["stock_error"].abs())
+ .sort_values("abs_s", ascending=False)
+ .head(100).iterrows()):
+ ds = r["date"].strftime("%Y-%m-%d") if hasattr(r["date"], "strftime") else str(r["date"])[:10]
+ sc = "color:var(--danger)" if r["stock_error"] < 0 else "color:var(--warn)"
+ rc = "color:var(--danger)" if r["residual"] < 0 else "color:var(--warn)"
+ pch = "color:var(--danger);font-weight:600" if r["stock_error_pct"] > 5 else ("color:var(--warn)" if r["stock_error_pct"] > 1 else "")
+ err_rows.append(
+ f'| {ds} | '
+ f'{r["isin"]} | '
+ f'{r["residual"]:+,.2f} | '
+ f'{r["stock_error"]:+,.2f} | '
+ f'{r["stock_error_pct"]:.3f}% | '
+ f'
'
+ )
+ err_isin_detail = "".join(err_rows) if err_rows else (
+ '| ✓ Error account is flat |
'
+ )
+
+ # HTML block for error account section
+ err_section_html = f"""
+ 06 · Error Account
+
+
+
+
+
+
+
Max |error stock|
+
{max_err_stock:,.1f} shares
+
+
+
Max % of total AUM
+
{max_err_pct:.3f}%
+
+
+
Stationarity σ/μ
+
{stationarity:.3f}
+
lower = more stationary
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | Date | ISIN |
+ Monthly residual |
+ Cumul. stock |
+ % of max AUM |
+
+ {err_isin_detail}
+
+
+
"""
+
+ # JS block for error account charts
+ err_js_block = f"""
+// ── 8. Error account charts ──────────────────────────────────
+const ERR_DATES = {err_dates_js};
+const ERR_AGG_STOCK = {err_agg_stock_js};
+const ERR_AGG_RES = {err_agg_res_js};
+const ERR_ISIN_TS = {err_isin_ds_js};
+const ERR_ISIN_DATES = {err_isin_dates_js};
+
+new Chart(document.getElementById('chartErrStock'), {{
+ type: 'line',
+ data: {{ labels: ERR_DATES, datasets: [{{
+ label: 'Aggregate error stock', data: ERR_AGG_STOCK,
+ borderColor: '#ef4444', backgroundColor: '#ef444415',
+ borderWidth: 2, pointRadius: 0, tension: 0.3, fill: true
+ }}] }},
+ options: {{
+ responsive: true, maintainAspectRatio: false,
+ interaction: {{mode:'index', intersect:false}},
+ plugins: {{ legend: {{display:false}}, tooltip: tooltip() }},
+ scales: {{ x: timeAxis(), y: {{
+ ...yAxis('Shares'),
+ grid: {{ color: ctx => ctx.tick.value === 0 ? '#ffffff55' : '#1a2030',
+ lineWidth: ctx => ctx.tick.value === 0 ? 1.5 : 1 }}
+ }} }}
+ }}
+}});
+
+new Chart(document.getElementById('chartErrRes'), {{
+ type: 'bar',
+ data: {{ labels: ERR_DATES, datasets: [{{
+ label: 'Monthly residual', data: ERR_AGG_RES,
+ backgroundColor: ERR_AGG_RES.map(v => v != null && v < 0 ? '#ef444488' : '#f59e0b88'),
+ borderColor: ERR_AGG_RES.map(v => v != null && v < 0 ? '#ef4444' : '#f59e0b'),
+ borderWidth: 1, borderRadius: 2
+ }}] }},
+ options: {{
+ responsive: true, maintainAspectRatio: false,
+ plugins: {{ legend: {{display:false}}, tooltip: tooltip() }},
+ scales: {{ x: timeAxis(), y: yAxis('Shares') }}
+ }}
+}});
+
+new Chart(document.getElementById('chartErrIsin'), {{
+ type: 'line',
+ data: {{ labels: ERR_ISIN_DATES, datasets: ERR_ISIN_TS }},
+ options: {{
+ responsive: true, maintainAspectRatio: false,
+ interaction: {{mode:'index', intersect:false}},
+ plugins: {{
+ legend: {{position:'right', labels:{{boxWidth:10, padding:8, font:{{size:10}}}}}},
+ tooltip: tooltip()
+ }},
+ scales: {{ x: timeAxis(), y: yAxis('Error stock (shares)') }}
+ }}
+}});"""
+
+ else:
+ err_section_html = ""
+ err_js_block = ""
+
# ── 4.3 Surgery detail table rows ──────────────────────────
sd = analytics["surgery_detail"].sort_values("date")
surg_rows_html = ""
if len(sd) == 0:
- surg_rows_html = "| No surgeries performed |
"
+ surg_rows_html = "| No surgeries performed |
"
else:
for _, r in sd.iterrows():
gain_class = "gain-high" if r["gain_vs_no_surgery"] > 0.05 else "gain-low"
+ lb = int(r.get("lookback_months", 1))
+ lb_cell = (f'{lb}m' if lb > 1 else "—")
surg_rows_html += f"""
| {r['date'].date()} |
@@ -275,6 +509,7 @@ def build_html(analytics, surgery, scores, mapping):
{r['jaccard_composite']:.4f} |
+{r['gain_vs_no_surgery']:.6f} |
{r['gain_pct_of_score']:.1f}% |
+ {lb_cell} |
"""
# ── 4.4 Top accounts table ──────────────────────────────────
@@ -857,6 +1092,7 @@ def build_html(analytics, surgery, scores, mapping):
Jaccard |
Score gain |
% of score |
+ Lookback |
{surg_rows_html}
@@ -887,6 +1123,9 @@ def build_html(analytics, surgery, scores, mapping):
+ {err_section_html}
+
+
@@ -1297,6 +1536,7 @@ new Chart(document.getElementById('chartJaccard'), {{
}},
}},
}});
+{err_js_block}