"""
Helper methods that are used in the repair challenge
"""
import json
import pandas as pd
import numpy as np
import s3fs
import os
def load_data_diagnostics():
fs = s3fs.S3FileSystem(
client_kwargs={"endpoint_url": "https://" + "minio-simple.lab.groupe-genes.fr"},
key=os.environ["AWS_ACCESS_KEY_ID"],
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
token=os.environ["AWS_SESSION_TOKEN"],
)
with fs.open("projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv", "rb") as f:
flows = pd.read_csv(f, sep=";")
with fs.open("projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv", "rb") as f:
aum = pd.read_csv(f, sep=";")
aum["Centralisation Date"] = pd.to_datetime(aum["Centralisation Date"])
flows["Centralisation Date"] = pd.to_datetime(flows["Centralisation Date"])
return aum, flows
def load_data_repair():
fs = s3fs.S3FileSystem(
client_kwargs={"endpoint_url": "https://" + "minio-simple.lab.groupe-genes.fr"},
key=os.environ["AWS_ACCESS_KEY_ID"],
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
token=os.environ["AWS_SESSION_TOKEN"],
)
with fs.open("projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv", "rb") as f:
flows = pd.read_csv(f, sep=";")
with fs.open("projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv", "rb") as f:
aum = pd.read_csv(f, sep=";")
aum["Centralisation Date"] = pd.to_datetime(aum["Centralisation Date"])
flows["Centralisation Date"] = pd.to_datetime(flows["Centralisation Date"])
# Noms courts
aum = aum.rename(
columns={
"Registrar Account - ID": "reg_id",
"Product - Isin": "isin",
"Centralisation Date": "date",
"Quantity - AUM": "qty_aum",
"Value - AUM €": "val_eur",
"Registrar Account - Region": "region",
}
)
flows = flows.rename(
columns={
"Registrar Account - ID": "reg_id",
"Product - Isin": "isin",
"Centralisation Date": "date",
"Quantity - NetFlows": "qty_net",
"Value € - NetFlows": "val_net_eur",
}
)
aum["reg_id"] = aum["reg_id"].astype(str)
flows["reg_id"] = flows["reg_id"].astype(str)
return aum, flows
def load_inputs_branch(mapping_path, surgery_path):
fs = s3fs.S3FileSystem(
client_kwargs={"endpoint_url": "https://" + "minio-simple.lab.groupe-genes.fr"},
key=os.environ["AWS_ACCESS_KEY_ID"],
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
token=os.environ["AWS_SESSION_TOKEN"],
)
with fs.open(
"s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv", "rb"
) as f:
aum = pd.read_csv(f, sep=";")
mapping = pd.read_csv(mapping_path, parse_dates=["date"])
surgery = (
pd.read_csv(surgery_path, parse_dates=["date"])
if surgery_path
else pd.DataFrame()
)
# Normalise ID columns to string
aum["Registrar Account - ID"] = aum["Registrar Account - ID"].astype(str)
mapping["reg_orig"] = mapping["reg_orig"].astype(str)
mapping["reg_used"] = mapping["reg_used"].astype(str)
if not surgery.empty:
surgery["reg_orig"] = surgery["reg_orig"].astype(str)
surgery["reg_from"] = surgery["reg_from"].astype(str)
surgery["reg_to"] = surgery["reg_to"].astype(str)
return aum, mapping, surgery
# ─────────────────────────────────────────────────────────────
# BUILD HTML REPORT
# ─────────────────────────────────────────────────────────────
def build_html_diagnostics(df_broken, df_all, df_agg, df_err_isin, df_err_agg, alpha):
# ── JS-ready data ────────────────────────────────────────────
# Timeline: n_broken and total_missing per month
tl = (
df_all[df_all["broken"]]
.groupby("date")
.agg(
n_broken=("isin", "count"),
total_missing=("missing_flow", lambda x: x.abs().sum()),
n_lag=("is_lag", "sum"),
)
.reindex(df_all["date"].sort_values().unique())
.fillna(0)
)
tl.index = pd.to_datetime(tl.index)
dates_str = json.dumps([d.strftime("%Y-%m-%d") for d in tl.index])
def jf(arr, dec=4):
return json.dumps(
[round(float(v), dec) if not np.isnan(v) else None for v in arr]
)
ISIN_COLORS = [
"#2563eb",
"#16a34a",
"#dc2626",
"#d97706",
"#7c3aed",
"#0891b2",
"#db2777",
"#65a30d",
"#ea580c",
"#6366f1",
]
n_broken_js = jf(tl["n_broken"].values, 0)
total_miss_js = jf(tl["total_missing"].values)
n_lag_js = jf(tl["n_lag"].values, 0)
# Aggregate (cross-ISIN) JS data
agg_dates_str = json.dumps(
[d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_agg["date"])]
)
agg_delta_js = jf(df_agg["delta_aum"].values)
agg_flow_js = jf(df_agg["flow_total"].values)
agg_missing_js = jf(df_agg["missing_flow"].values)
agg_pct_js = jf((df_agg["missing_pct"] * 100).values)
# Aggregate KPIs
n_agg_broken = int(df_agg["broken"].sum())
n_agg_lag = int(df_agg["is_lag"].sum())
n_agg_genuine = n_agg_broken - n_agg_lag
max_agg_pct = float(df_agg["missing_pct"].max() * 100) if len(df_agg) else 0
# Aggregate detail table rows
agg_rows = []
for _, r in df_agg[df_agg["broken"]].iterrows():
lb = 'lag' if r["is_lag"] else ""
pc = "pct-high" if r["missing_pct"] > 0.1 else "pct-med"
ds = (
r["date"].strftime("%Y-%m-%d")
if hasattr(r["date"], "strftime")
else str(r["date"])[:10]
)
mc = "miss-neg" if r["missing_flow"] < 0 else "miss-pos"
agg_rows.append(
f"
| {ds} | "
f'{r["q_total_prev"]:,.1f} | '
f'{r["q_total_curr"]:,.1f} | '
f'{r["flow_total"]:,.1f} | '
f'{r["missing_flow"]:+,.1f} | '
f'{r["missing_pct"] * 100:.2f}% | '
f"{lb} |
"
)
agg_detail_rows = (
"".join(agg_rows)
if agg_rows
else (
'| ✓ No broken months at aggregate level |
'
)
)
# ── Error account JS data ────────────────────────────────────
err_dates_str = json.dumps(
[d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_err_agg["date"])]
)
err_agg_stock_js = jf(df_err_agg["stock_error_agg"].values)
err_agg_res_js = jf(df_err_agg["residual_agg"].values)
err_agg_pct_js = jf(df_err_agg["stock_error_agg_pct"].values)
# Top 5 ISINs by max absolute stock error
top_err_isins = (
df_err_isin.groupby("isin")["stock_error"]
.apply(lambda x: x.abs().max())
.nlargest(5)
.index.tolist()
)
all_err_dates = sorted(df_err_isin["date"].unique())
err_isin_datasets = []
for idx, isin in enumerate(top_err_isins):
sub = (
df_err_isin[df_err_isin["isin"] == isin]
.set_index("date")["stock_error"]
.reindex(all_err_dates)
)
err_isin_datasets.append(
{
"label": isin,
"data": [
round(float(v), 3) if not pd.isna(v) else None for v in sub.values
],
"borderColor": ISIN_COLORS[idx % len(ISIN_COLORS)],
"backgroundColor": ISIN_COLORS[idx % len(ISIN_COLORS)] + "22",
"borderWidth": 1.5,
"pointRadius": 0,
"tension": 0.3,
"fill": False,
}
)
err_isin_ts_json = json.dumps(err_isin_datasets)
err_isin_dates_str = json.dumps(
[
d.strftime("%Y-%m-%d") if hasattr(d, "strftime") else str(d)[:10]
for d in all_err_dates
]
)
# Error account KPIs
max_agg_stock_err = float(df_err_agg["stock_error_agg"].abs().max())
max_agg_stock_pct = float(df_err_agg["stock_error_agg_pct"].max())
# Stationarity proxy: std / mean_abs (lower = more stationary)
agg_std = float(df_err_agg["stock_error_agg"].std())
agg_mean = float(df_err_agg["stock_error_agg"].abs().mean())
stationarity = round(agg_std / max(agg_mean, 1e-9), 3)
# Error account ISIN detail table (worst months per ISIN)
err_worst = (
df_err_isin.assign(abs_stock=df_err_isin["stock_error"].abs())
.sort_values("abs_stock", ascending=False)
.head(200)
)
err_isin_rows = []
for _, r in err_worst.iterrows():
ds = (
r["date"].strftime("%Y-%m-%d")
if hasattr(r["date"], "strftime")
else str(r["date"])[:10]
)
sc = "miss-neg" if r["stock_error"] < 0 else "miss-pos"
rc = "miss-neg" if r["residual"] < 0 else "miss-pos"
pch = (
"pct-high"
if r["stock_error_pct"] > 5
else ("pct-med" if r["stock_error_pct"] > 1 else "")
)
err_isin_rows.append(
f"| {ds} | "
f'{r["isin"]} | '
f'{r["residual"]:+,.2f} | '
f'{r["stock_error"]:+,.2f} | '
f'{r["stock_error_pct"]:.3f}% |
'
)
err_isin_detail = (
"".join(err_isin_rows)
if err_isin_rows
else (
'| ✓ Error account is flat (no residuals) |
'
)
)
# Per-ISIN summary
isin_sum = (
df_broken.groupby("isin")
.agg(
n_months=("date", "count"),
avg_pct=("missing_pct", "mean"),
total_abs=("missing_flow", lambda x: x.abs().sum()),
)
.sort_values("total_abs", ascending=False)
)
# Per-ISIN missing_pct timeseries for the top 5 ISINs
top_isins = isin_sum.head(5).index.tolist()
all_dates = sorted(df_all["date"].unique())
isin_ts_datasets = []
for idx, isin in enumerate(top_isins):
sub = (
df_all[df_all["isin"] == isin]
.set_index("date")["missing_pct"]
.reindex(all_dates)
.fillna(0)
)
isin_ts_datasets.append(
{
"label": isin,
"data": [round(float(v) * 100, 3) for v in sub.values],
"borderColor": ISIN_COLORS[idx % len(ISIN_COLORS)],
"backgroundColor": ISIN_COLORS[idx % len(ISIN_COLORS)] + "22",
"borderWidth": 2,
"pointRadius": 0,
"tension": 0.3,
"fill": False,
}
)
isin_ts_json = json.dumps(isin_ts_datasets)
all_dates_str = json.dumps(
[
d.strftime("%Y-%m-%d") if hasattr(d, "strftime") else str(d)[:10]
for d in all_dates
]
)
# Detail table rows
detail_rows = ""
for _, r in df_broken.head(200).iterrows():
lag_badge = 'lag' if r["is_lag"] else ""
pct_class = "pct-high" if r["missing_pct"] > 0.1 else "pct-med"
detail_rows += f"""
| {r["date"].strftime("%Y-%m-%d") if hasattr(r["date"], "strftime") else str(r["date"])[:10]} |
{r["isin"]} |
{r["q_agg_prev"]:,.1f} |
{r["q_agg_curr"]:,.1f} |
{r["flow_agg"]:,.1f} |
{r["missing_flow"]:+,.1f} |
{r["missing_pct"] * 100:.2f}% |
{lag_badge} |
"""
# ISIN summary table
isin_rows = ""
for isin, row in isin_sum.iterrows():
isin_rows += f"""
| {isin} |
{int(row["n_months"])} |
{row["avg_pct"] * 100:.2f}% |
{row["total_abs"]:,.1f} |
"""
# KPIs
total = len(df_all)
n_broken_kpi = len(df_broken)
n_lag_kpi = int(df_broken["is_lag"].sum())
n_genuine = n_broken_kpi - n_lag_kpi
max_pct = df_broken["missing_pct"].max() * 100 if len(df_broken) else 0
n_isins = df_broken["isin"].nunique()
no_broken_msg = ""
if n_broken_kpi == 0:
no_broken_msg = '✓ No broken months detected at this threshold.
'
html = f"""
Carmignac — Broken Months Diagnostics
(ISIN, month) pairs
{total:,}
examined
Broken months
0 else "success"}">{
n_broken_kpi:,}
{n_broken_kpi / total * 100:.1f}% of pairs
Likely lags
{n_lag_kpi}
resolved by ±{3}d window
Genuine gaps
0 else "success"}">{
n_genuine
}
unresolved by lag fix
ISINs affected
{n_isins}
distinct ISINs
Max missing %
10 else "warn"}">{max_pct:.1f}%
worst single (isin, month)
00 · Error account — cumulative residuals
Max |stock error|
{
max_agg_stock_err:,.1f} shares
Max % of total AUM
5 else "var(--warn)"
}">{max_agg_stock_pct:.3f}%
Stationarity (σ/μ)
{stationarity:.3f}
lower = more stationary
| Date | ISIN |
Monthly residual |
Cumulative stock |
% of max AUM |
{err_isin_detail}
01 · Aggregate view — all ISINs combined
| Date |
Σ Q(t−1) | Σ Q(t) |
Σ Flow | Missing |
Missing % | |
{agg_detail_rows}
01 · Timeline — per ISIN
02 · By ISIN
{
'
No broken months detected.
'
if n_broken_kpi == 0
else f'''
| ISIN | Broken months |
Avg missing % | Total |missing| (shares) |
{isin_rows}
'''
}
03 · Detail log
Threshold α = {alpha:.1%} · showing up to 200 rows
{
'
✓ No broken months detected at this threshold.
'
if n_broken_kpi == 0
else f'''
| Date | ISIN |
Q(t-1) | Q(t) |
Net flow | Missing |
Missing % of movement | |
{detail_rows}
'''
}
"""
return html
def build_html_repair(analytics, surgery, scores, mapping, df_err_isin=None, df_err_agg=None):
tl = analytics["timeline"]
ss = analytics["surgery_stats"]
piv = analytics["pivot"]
ch = analytics["churn"]
dates_str = analytics["dates"]
# ── helpers to serialise for JS ─────────────────────────────
def jf(arr, decimals=6):
return json.dumps(
[round(float(v), decimals) if not np.isnan(v) else None for v in arr]
)
def js(arr):
return json.dumps(list(arr))
# ── colour palette ───────────────────────────────────────────
REG_COLORS = [
"#2563eb",
"#16a34a",
"#dc2626",
"#d97706",
"#7c3aed",
"#0891b2",
"#db2777",
"#65a30d",
"#ea580c",
"#6366f1",
"#059669",
"#b45309",
"#9333ea",
"#0284c7",
"#e11d48",
]
# ── 4.1 Surgery sparkline data ──────────────────────────────
surg_dates = [d.strftime("%Y-%m-%d") for d in ss.index]
n_surg = jf(ss["n_surgeries"].values, 0)
total_gain = jf(ss["total_gain"].values)
avg_gain = jf(ss["avg_gain"].values)
avg_jaccard = jf(ss["avg_jaccard"].values)
# ── 4.2 Individual trajectories ────────────────────────────
reg_ids = list(piv.columns)
traj_datasets = []
# Surgery lookup: reg_orig -> list of {date, from, to, composite}
surg_by_reg = {}
for _, row in surgery.iterrows():
surg_by_reg.setdefault(row["reg_orig"], []).append(
{
"date": row["date"].strftime("%Y-%m-%d"),
"reg_from": str(row["reg_from"]),
"reg_to": str(row["reg_to"]),
"composite": round(float(row["jaccard_composite"]), 4),
"gain": round(float(row["gain_vs_no_surgery"]), 6),
}
)
for idx, rid in enumerate(reg_ids):
remapped = rid in analytics["ever_remapped"]
traj_datasets.append(
{
"label": rid,
"data": [
round(float(v), 6) if not np.isnan(v) else None
for v in piv[rid].values
],
"borderColor": REG_COLORS[idx % len(REG_COLORS)],
"backgroundColor": REG_COLORS[idx % len(REG_COLORS)] + "22",
"borderWidth": 2,
"borderDash": [6, 3] if remapped else [],
"pointRadius": 0,
"tension": 0.3,
"fill": False,
"remapped": remapped,
"surgeries": surg_by_reg.get(rid, []),
}
)
traj_json = json.dumps(traj_datasets)
# ── 4.2b Error account data (optional) ────────────────────
has_error = df_err_isin is not None and df_err_agg is not None
if has_error:
err_dates = [d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_err_agg["date"])]
err_agg_stock = [
round(float(v), 3) if not pd.isna(v) else None
for v in df_err_agg["stock_error_agg"].values
]
err_agg_res = [
round(float(v), 3) if not pd.isna(v) else None
for v in df_err_agg["residual_agg"].values
]
err_agg_pct = [
round(float(v), 4) if not pd.isna(v) else None
for v in df_err_agg["stock_error_agg_pct"].values
]
# Top 5 ISINs by max |stock error|
top_err = (
df_err_isin.groupby("isin")["stock_error"]
.apply(lambda x: x.abs().max())
.nlargest(5)
.index.tolist()
)
all_err_dates = sorted(df_err_isin["date"].unique())
ERR_COLORS = ["#ef4444", "#f59e0b", "#8b5cf6", "#06b6d4", "#10b981"]
err_isin_ds = []
for idx, isin in enumerate(top_err):
sub = (
df_err_isin[df_err_isin["isin"] == isin]
.set_index("date")["stock_error"]
.reindex(all_err_dates)
)
err_isin_ds.append(
{
"label": isin,
"data": [
round(float(v), 3) if not pd.isna(v) else None
for v in sub.values
],
"borderColor": ERR_COLORS[idx % len(ERR_COLORS)],
"backgroundColor": ERR_COLORS[idx % len(ERR_COLORS)] + "22",
"borderWidth": 1.5,
"pointRadius": 0,
"tension": 0.3,
"fill": False,
}
)
max_err_stock = float(df_err_agg["stock_error_agg"].abs().max())
max_err_pct = float(df_err_agg["stock_error_agg_pct"].max())
agg_std = float(df_err_agg["stock_error_agg"].std())
agg_mean = float(df_err_agg["stock_error_agg"].abs().mean())
stationarity = round(agg_std / max(agg_mean, 1e-9), 3)
err_dates_js = json.dumps(err_dates)
err_agg_stock_js = json.dumps(err_agg_stock)
err_agg_res_js = json.dumps(err_agg_res)
err_agg_pct_js = json.dumps(err_agg_pct)
err_isin_ds_js = json.dumps(err_isin_ds)
err_isin_dates_js = json.dumps(
[
d.strftime("%Y-%m-%d") if hasattr(d, "strftime") else str(d)[:10]
for d in all_err_dates
]
)
# ISIN detail table (top 100 worst)
err_rows = []
for _, r in (
df_err_isin.assign(abs_s=df_err_isin["stock_error"].abs())
.sort_values("abs_s", ascending=False)
.head(100)
.iterrows()
):
ds = (
r["date"].strftime("%Y-%m-%d")
if hasattr(r["date"], "strftime")
else str(r["date"])[:10]
)
sc = "color:var(--danger)" if r["stock_error"] < 0 else "color:var(--warn)"
rc = "color:var(--danger)" if r["residual"] < 0 else "color:var(--warn)"
pch = (
"color:var(--danger);font-weight:600"
if r["stock_error_pct"] > 5
else ("color:var(--warn)" if r["stock_error_pct"] > 1 else "")
)
err_rows.append(
f"| {ds} | "
f'{r["isin"]} | '
f'{r["residual"]:+,.2f} | '
f'{r["stock_error"]:+,.2f} | '
f'{r["stock_error_pct"]:.3f}% | '
f"
"
)
err_isin_detail = (
"".join(err_rows)
if err_rows
else (
'| ✓ Error account is flat |
'
)
)
# HTML block for error account section
err_section_html = f"""
06 · Error Account
Max |error stock|
{max_err_stock:,.1f} shares
Max % of total AUM
5 else "var(--warn)"}">{max_err_pct:.3f}%
Stationarity σ/μ
{stationarity:.3f}
lower = more stationary
| Date | ISIN |
Monthly residual |
Cumul. stock |
% of max AUM |
{err_isin_detail}
"""
# JS block for error account charts
err_js_block = f"""
// ── 8. Error account charts ──────────────────────────────────
const ERR_DATES = {err_dates_js};
const ERR_AGG_STOCK = {err_agg_stock_js};
const ERR_AGG_RES = {err_agg_res_js};
const ERR_ISIN_TS = {err_isin_ds_js};
const ERR_ISIN_DATES = {err_isin_dates_js};
new Chart(document.getElementById('chartErrStock'), {{
type: 'line',
data: {{ labels: ERR_DATES, datasets: [{{
label: 'Aggregate error stock', data: ERR_AGG_STOCK,
borderColor: '#ef4444', backgroundColor: '#ef444415',
borderWidth: 2, pointRadius: 0, tension: 0.3, fill: true
}}] }},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{mode:'index', intersect:false}},
plugins: {{ legend: {{display:false}}, tooltip: tooltip() }},
scales: {{ x: timeAxis(), y: {{
...yAxis('Shares'),
grid: {{ color: ctx => ctx.tick.value === 0 ? '#ffffff55' : '#1a2030',
lineWidth: ctx => ctx.tick.value === 0 ? 1.5 : 1 }}
}} }}
}}
}});
new Chart(document.getElementById('chartErrRes'), {{
type: 'bar',
data: {{ labels: ERR_DATES, datasets: [{{
label: 'Monthly residual', data: ERR_AGG_RES,
backgroundColor: ERR_AGG_RES.map(v => v != null && v < 0 ? '#ef444488' : '#f59e0b88'),
borderColor: ERR_AGG_RES.map(v => v != null && v < 0 ? '#ef4444' : '#f59e0b'),
borderWidth: 1, borderRadius: 2
}}] }},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{display:false}}, tooltip: tooltip() }},
scales: {{ x: timeAxis(), y: yAxis('Shares') }}
}}
}});
new Chart(document.getElementById('chartErrIsin'), {{
type: 'line',
data: {{ labels: ERR_ISIN_DATES, datasets: ERR_ISIN_TS }},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{mode:'index', intersect:false}},
plugins: {{
legend: {{position:'right', labels:{{boxWidth:10, padding:8, font:{{size:10}}}}}},
tooltip: tooltip()
}},
scales: {{ x: timeAxis(), y: yAxis('Error stock (shares)') }}
}}
}});"""
else:
err_section_html = ""
err_js_block = ""
# ── 4.3 Surgery detail table rows ──────────────────────────
sd = analytics["surgery_detail"].sort_values("date")
surg_rows_html = ""
if len(sd) == 0:
surg_rows_html = "| No surgeries performed |
"
else:
for _, r in sd.iterrows():
gain_class = "gain-high" if r["gain_vs_no_surgery"] > 0.05 else "gain-low"
lb = int(r.get("lookback_months", 1))
lb_cell = (
f'{lb}m'
if lb > 1
else "—"
)
surg_rows_html += f"""
| {r["date"].date()} |
{r["reg_orig"]} |
{r["reg_from"]} |
→ |
{r["reg_to"]} |
{r["jaccard_composite"]:.4f} |
+{r["gain_vs_no_surgery"]:.6f} |
{r["gain_pct_of_score"]:.1f}% |
{lb_cell} |
"""
# ── 4.4 Top accounts table ──────────────────────────────────
last_date = piv.index.max()
top_accounts = piv.loc[last_date].dropna().sort_values(ascending=False)
top_rows_html = ""
for rank, (rid, sc) in enumerate(top_accounts.items(), 1):
remapped = "✓" if rid in analytics["ever_remapped"] else ""
bar_w = int(sc / top_accounts.max() * 100)
color = REG_COLORS[(rank - 1) % len(REG_COLORS)]
top_rows_html += f"""
| #{rank} |
{rid} |
{sc:.6f} |
|
{remapped} |
"""
# ─────────────────────────────────────────────────────────────
# HTML TEMPLATE
# ─────────────────────────────────────────────────────────────
html = f"""
Carmignac Pipeline — Analysis Report
Σ score at t_ref
{tl["sum_post"].iloc[-1]:.4f}
post-surgery
Σ score at t_min
{tl["sum_post"].iloc[0]:.4f}
post-surgery
Max recovery
{tl["recovery_pct"].max():.1f}%
score rescued by surgery
Total surgeries
{len(surgery)}
operations performed
Reg IDs universe
{piv.shape[1]}
at reference date
Ever remapped
{len(analytics["ever_remapped"])}
reg IDs w/ code change
01 · Score Integrity Over Time
02 · Individual Score Trajectories
03 · Surgery Operations
04 · Surgery Detail Log
{
'
No surgeries were performed on this dataset.
'
if len(surgery) == 0
else f'''
| Date |
Reg orig |
Code from |
|
Code to |
Jaccard |
Score gain |
% of score |
Lookback |
{surg_rows_html}
'''
}
05 · Score Ranking at t_ref
| Rank |
Registrar ID |
Score (weight) |
Relative size |
Remapped |
{top_rows_html}
{err_section_html}
"""
return html