Project_Carmignac/repair_challenge/carmignac repair.py

1038 lines
36 KiB
Python
Raw Normal View History

2026-03-20 00:08:01 +01:00
"""
Carmignac Data Challenge Pipeline Results Analysis
=====================================================
Analyses the CSV outputs produced by carmignac_repair.py:
- carmignac_scores.csv (post-surgery score history)
- carmignac_mapping.csv (reg_id mapping history)
- carmignac_surgery_log.csv (surgery operations)
Produces a self-contained HTML report with interactive charts.
Usage:
python carmignac_analysis.py
python carmignac_analysis.py --scores path/to/scores.csv \
--mapping path/to/mapping.csv \
--surgery path/to/surgery_log.csv \
--out report.html
"""
import argparse
import json
import os
import sys
import numpy as np
import pandas as pd
# ─────────────────────────────────────────────────────────────
# 1. LOAD & VALIDATE
# ─────────────────────────────────────────────────────────────
def load_outputs(scores_path, mapping_path, surgery_path):
scores = pd.read_csv(scores_path, parse_dates=["date"])
mapping = pd.read_csv(mapping_path, parse_dates=["date"])
surgery = pd.read_csv(surgery_path, parse_dates=["date"])
# Normalise dtypes
scores["reg_id"] = scores["reg_id"].astype(str)
mapping["reg_orig"] = mapping["reg_orig"].astype(str)
mapping["reg_used"] = mapping["reg_used"].astype(str)
mapping["changed"] = mapping["changed"].astype(bool)
surgery["reg_orig"] = surgery["reg_orig"].astype(str)
surgery["reg_from"] = surgery["reg_from"].astype(str)
surgery["reg_to"] = surgery["reg_to"].astype(str)
return scores, mapping, surgery
# ─────────────────────────────────────────────────────────────
# 2. COMPUTE ANALYTICS
# ─────────────────────────────────────────────────────────────
def compute_analytics(scores, mapping, surgery):
dates = sorted(scores["date"].unique())
# ── 2.1 Sum of scores per date (post-surgery) ──────────────
sum_post = (scores.groupby("date")["score"]
.sum()
.reindex(dates)
.rename("sum_post"))
# ── 2.2 Reconstruct pre-surgery (counterfactual) ───────────
# Without surgery, every reg_id that had a hard break would score 0
# from that date backwards. We propagate the surgery "gain" as a
# cumulative deficit going back in time.
gain_by_date = surgery.groupby("date")["gain_vs_no_surgery"].sum()
# cumulative deficit = sum of gains for all surgeries at or after date t
cumulative_deficit = pd.Series(0.0, index=dates)
for d in dates:
cumulative_deficit[d] = gain_by_date[gain_by_date.index >= d].sum()
sum_pre = (sum_post - cumulative_deficit).clip(lower=0).rename("sum_pre")
timeline = pd.DataFrame({"sum_post": sum_post, "sum_pre": sum_pre})
timeline.index = pd.to_datetime(timeline.index)
timeline["recovery_pct"] = np.where(
sum_pre < sum_post,
(sum_post - sum_pre) / sum_post.clip(lower=1e-9) * 100,
0.0,
)
# ── 2.3 Per-date surgery stats ─────────────────────────────
surgery_stats = (
surgery.groupby("date")
.agg(
n_surgeries = ("reg_orig", "count"),
total_gain = ("gain_vs_no_surgery", "sum"),
avg_gain = ("gain_vs_no_surgery", "mean"),
avg_jaccard = ("jaccard_composite", "mean"),
avg_score_before = ("score_before", "mean"),
avg_score_after = ("score_after", "mean"),
)
.reindex(dates, fill_value=0)
)
# ── 2.4 Score distribution over time ───────────────────────
# Wide format: rows=dates, cols=reg_ids
pivot = scores.pivot_table(index="date", columns="reg_id",
values="score", aggfunc="last")
pivot = pivot.reindex(dates)
pivot.index = pd.to_datetime(pivot.index)
# ── 2.5 Mapping churn ──────────────────────────────────────
# For each date, how many reg_ids are remapped (not using their original code)?
churn = (mapping.groupby("date")["changed"]
.sum()
.reindex(dates, fill_value=0)
.rename("n_remapped"))
# ── 2.6 Score entropy (distribution spread) ────────────────
def entropy(row):
p = row.dropna()
p = p[p > 0]
if len(p) == 0:
return np.nan
p = p / p.sum()
return -(p * np.log(p)).sum()
timeline["entropy"] = pivot.apply(entropy, axis=1).values
# ── 2.7 Individual score trajectories ──────────────────────
# Identify which reg_ids were ever remapped
ever_remapped = set(mapping.loc[mapping["changed"], "reg_orig"].unique())
# ── 2.8 Surgery detail table ───────────────────────────────
surgery_detail = surgery.copy()
surgery_detail["gain_pct_of_score"] = (
surgery_detail["gain_vs_no_surgery"]
/ surgery_detail["score_before"].clip(lower=1e-9) * 100
).round(2)
return {
"timeline": timeline,
"surgery_stats": surgery_stats,
"pivot": pivot,
"churn": churn,
"ever_remapped": ever_remapped,
"surgery_detail": surgery_detail,
"dates": [d.strftime("%Y-%m-%d") for d in dates],
}
# ─────────────────────────────────────────────────────────────
# 3. PRINT CONSOLE SUMMARY
# ─────────────────────────────────────────────────────────────
def print_summary(analytics, surgery):
tl = analytics["timeline"]
ss = analytics["surgery_stats"]
print("\n" + "=" * 65)
print(" CARMIGNAC PIPELINE — RESULTS SUMMARY")
print("=" * 65)
print(f"\n Date range : {tl.index.min().date()}{tl.index.max().date()}")
print(f" Total months : {len(tl)}")
print(f" Reg IDs : {analytics['pivot'].shape[1]}")
print(f"\n ── Score (Σ) ──────────────────────────────────────────")
print(f" At t_ref (latest) : {tl['sum_post'].iloc[-1]:.6f}")
print(f" At t_min (earliest): {tl['sum_post'].iloc[0]:.6f}")
print(f" Min (post-surgery) : {tl['sum_post'].min():.6f} "
f"({tl['sum_post'].idxmin().date()})")
print(f" Min (pre-surgery) : {tl['sum_pre'].min():.6f} "
f"({tl['sum_pre'].idxmin().date()})")
print(f" Max recovery (pct) : {tl['recovery_pct'].max():.2f}%")
print(f"\n ── Surgeries ─────────────────────────────────────────")
if len(surgery) == 0:
print(" No surgeries performed.")
else:
print(f" Total operations : {len(surgery)}")
print(f" Total score gained : {surgery['gain_vs_no_surgery'].sum():.6f}")
print(f" Avg Jaccard : {surgery['jaccard_composite'].mean():.4f}")
print(f" Avg gain / surgery : {surgery['gain_vs_no_surgery'].mean():.6f}")
print()
print(f" {'Date':12s} {'Reg orig':12s} {'From':15s} {'To':15s} "
f"{'Jaccard':>8s} {'Gain':>10s}")
print(" " + "-" * 78)
for _, row in surgery.sort_values("date").iterrows():
print(f" {str(row['date'].date()):12s} {row['reg_orig']:12s} "
f"{row['reg_from']:15s} {row['reg_to']:15s} "
f"{row['jaccard_composite']:8.4f} {row['gain_vs_no_surgery']:10.6f}")
print(f"\n ── Mapping churn ─────────────────────────────────────")
ch = analytics["churn"]
print(f" Max remapped at one date : {int(ch.max())} ({ch.idxmax().date() if ch.max()>0 else 'N/A'})")
print(f" Reg IDs ever remapped : {len(analytics['ever_remapped'])}")
print(f"\n ── Score entropy (distribution spread) ───────────────")
ent = analytics["timeline"]["entropy"]
print(f" Mean entropy : {ent.mean():.4f}")
print(f" Std entropy : {ent.std():.4f}")
print()
# ─────────────────────────────────────────────────────────────
# 4. BUILD HTML REPORT
# ─────────────────────────────────────────────────────────────
def build_html(analytics, surgery, scores, mapping):
tl = analytics["timeline"]
ss = analytics["surgery_stats"]
piv = analytics["pivot"]
ch = analytics["churn"]
dates_str = analytics["dates"]
# ── helpers to serialise for JS ─────────────────────────────
def jf(arr, decimals=6):
return json.dumps([round(float(v), decimals) if not np.isnan(v) else None
for v in arr])
def js(arr):
return json.dumps(list(arr))
# ── colour palette ───────────────────────────────────────────
REG_COLORS = [
"#2563eb","#16a34a","#dc2626","#d97706","#7c3aed",
"#0891b2","#db2777","#65a30d","#ea580c","#6366f1",
"#059669","#b45309","#9333ea","#0284c7","#e11d48",
]
# ── 4.1 Surgery sparkline data ──────────────────────────────
surg_dates = [d.strftime("%Y-%m-%d") for d in ss.index]
n_surg = jf(ss["n_surgeries"].values, 0)
total_gain = jf(ss["total_gain"].values)
avg_gain = jf(ss["avg_gain"].values)
avg_jaccard = jf(ss["avg_jaccard"].values)
# ── 4.2 Individual trajectories ────────────────────────────
reg_ids = list(piv.columns)
traj_datasets = []
for idx, rid in enumerate(reg_ids):
col = analytics["ever_remapped"]
dashed = rid in col
traj_datasets.append({
"label": rid,
"data": [round(float(v), 6) if not np.isnan(v) else None
for v in piv[rid].values],
"borderColor": REG_COLORS[idx % len(REG_COLORS)],
"backgroundColor": REG_COLORS[idx % len(REG_COLORS)] + "22",
"borderWidth": 2 if not dashed else 2,
"borderDash": [] if not dashed else [6, 3],
"pointRadius": 0,
"tension": 0.3,
"fill": False,
})
traj_json = json.dumps(traj_datasets)
# ── 4.3 Surgery detail table rows ──────────────────────────
sd = analytics["surgery_detail"].sort_values("date")
surg_rows_html = ""
if len(sd) == 0:
surg_rows_html = "<tr><td colspan='8' style='text-align:center;color:#888'>No surgeries performed</td></tr>"
else:
for _, r in sd.iterrows():
gain_class = "gain-high" if r["gain_vs_no_surgery"] > 0.05 else "gain-low"
surg_rows_html += f"""
<tr>
<td>{r['date'].date()}</td>
<td><span class="reg-badge">{r['reg_orig']}</span></td>
<td class="code-cell">{r['reg_from']}</td>
<td></td>
<td class="code-cell">{r['reg_to']}</td>
<td>{r['jaccard_composite']:.4f}</td>
<td class="{gain_class}">+{r['gain_vs_no_surgery']:.6f}</td>
<td>{r['gain_pct_of_score']:.1f}%</td>
</tr>"""
# ── 4.4 Top accounts table ──────────────────────────────────
last_date = piv.index.max()
top_accounts = piv.loc[last_date].dropna().sort_values(ascending=False)
top_rows_html = ""
for rank, (rid, sc) in enumerate(top_accounts.items(), 1):
remapped = "" if rid in analytics["ever_remapped"] else ""
bar_w = int(sc / top_accounts.max() * 100)
color = REG_COLORS[(rank - 1) % len(REG_COLORS)]
top_rows_html += f"""
<tr>
<td class="rank">#{rank}</td>
<td><span class="reg-badge" style="background:{color}22;border-color:{color}">{rid}</span></td>
<td class="score-val">{sc:.6f}</td>
<td class="bar-cell">
<div class="score-bar" style="width:{bar_w}%;background:{color}"></div>
</td>
<td class="center">{remapped}</td>
</tr>"""
# ─────────────────────────────────────────────────────────────
# HTML TEMPLATE
# ─────────────────────────────────────────────────────────────
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Carmignac Pipeline Analysis Report</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
<style>
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;600;700&display=swap');
:root {{
--bg: #0d0f12;
--surface: #151820;
--border: #252a35;
--accent: #3b82f6;
--accent2: #10b981;
--warn: #f59e0b;
--danger: #ef4444;
--text: #e2e8f0;
--muted: #64748b;
--mono: 'IBM Plex Mono', monospace;
--sans: 'IBM Plex Sans', sans-serif;
}}
*, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{
font-family: var(--sans);
background: var(--bg);
color: var(--text);
min-height: 100vh;
padding: 0 0 60px;
}}
/* Header */
.header {{
background: linear-gradient(135deg, #0d1117 0%, #111827 50%, #0d1f3c 100%);
border-bottom: 1px solid var(--border);
padding: 40px 48px 36px;
position: relative;
overflow: hidden;
}}
.header::before {{
content: '';
position: absolute;
inset: 0;
background: radial-gradient(ellipse 70% 80% at 80% 50%, #1e40af18, transparent);
pointer-events: none;
}}
.header-eyebrow {{
font-family: var(--mono);
font-size: 11px;
letter-spacing: 0.15em;
color: var(--accent);
text-transform: uppercase;
margin-bottom: 10px;
}}
.header h1 {{
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.02em;
line-height: 1.1;
margin-bottom: 8px;
}}
.header-sub {{
font-size: 0.85rem;
color: var(--muted);
font-family: var(--mono);
}}
/* KPI strip */
.kpi-strip {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
gap: 1px;
background: var(--border);
border-bottom: 1px solid var(--border);
}}
.kpi {{
background: var(--surface);
padding: 22px 28px;
display: flex;
flex-direction: column;
gap: 4px;
}}
.kpi-label {{
font-size: 0.7rem;
letter-spacing: 0.1em;
text-transform: uppercase;
color: var(--muted);
font-family: var(--mono);
}}
.kpi-value {{
font-size: 1.6rem;
font-weight: 700;
font-family: var(--mono);
color: var(--text);
line-height: 1;
}}
.kpi-value.accent {{ color: var(--accent); }}
.kpi-value.success {{ color: var(--accent2); }}
.kpi-value.warn {{ color: var(--warn); }}
.kpi-sub {{
font-size: 0.7rem;
color: var(--muted);
font-family: var(--mono);
}}
/* Main layout */
.main {{
max-width: 1400px;
margin: 0 auto;
padding: 36px 48px;
display: flex;
flex-direction: column;
gap: 32px;
}}
/* Cards */
.card {{
background: var(--surface);
border: 1px solid var(--border);
border-radius: 8px;
overflow: hidden;
}}
.card-header {{
padding: 18px 24px 14px;
border-bottom: 1px solid var(--border);
display: flex;
align-items: baseline;
gap: 12px;
}}
.card-title {{
font-size: 0.8rem;
font-weight: 600;
letter-spacing: 0.1em;
text-transform: uppercase;
color: var(--muted);
font-family: var(--mono);
}}
.card-desc {{
font-size: 0.78rem;
color: #475569;
}}
.card-body {{
padding: 24px;
}}
.chart-wrap {{
position: relative;
height: 280px;
}}
.chart-wrap-tall {{
position: relative;
height: 340px;
}}
/* Two-column grid */
.grid-2 {{
display: grid;
grid-template-columns: 1fr 1fr;
gap: 24px;
}}
@media (max-width: 900px) {{
.grid-2 {{ grid-template-columns: 1fr; }}
.main {{ padding: 24px 20px; }}
}}
/* Section label */
.section-label {{
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.15em;
text-transform: uppercase;
color: var(--muted);
padding: 0 4px;
border-left: 3px solid var(--accent);
padding-left: 10px;
margin-bottom: -8px;
}}
/* Tables */
table {{
width: 100%;
border-collapse: collapse;
font-size: 0.82rem;
}}
th {{
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--muted);
padding: 10px 14px;
text-align: left;
border-bottom: 1px solid var(--border);
background: #0f1218;
}}
td {{
padding: 10px 14px;
border-bottom: 1px solid #1a1f2a;
vertical-align: middle;
}}
tr:last-child td {{ border-bottom: none; }}
tr:hover td {{ background: #181e2b; }}
.rank {{ color: var(--muted); font-family: var(--mono); font-size: 0.75rem; }}
.score-val {{ font-family: var(--mono); color: var(--accent2); }}
.code-cell {{ font-family: var(--mono); font-size: 0.78rem; color: #94a3b8; }}
.center {{ text-align: center; color: var(--accent2); }}
.gain-high {{ font-family: var(--mono); color: var(--accent2); font-weight: 600; }}
.gain-low {{ font-family: var(--mono); color: var(--warn); }}
.bar-cell {{ width: 120px; }}
.score-bar {{
height: 6px;
border-radius: 3px;
min-width: 2px;
transition: width 0.3s;
}}
.reg-badge {{
display: inline-block;
padding: 2px 8px;
border-radius: 4px;
background: #1e2a3a;
border: 1px solid #2d3f54;
font-family: var(--mono);
font-size: 0.75rem;
color: var(--accent);
white-space: nowrap;
}}
/* Legend patch */
.legend-patch {{
display: inline-block;
width: 12px; height: 12px;
border-radius: 2px;
margin-right: 4px;
vertical-align: middle;
}}
/* No-surgery notice */
.no-surg {{
padding: 32px;
text-align: center;
color: var(--muted);
font-family: var(--mono);
font-size: 0.82rem;
}}
/* Footer */
.footer {{
text-align: center;
font-family: var(--mono);
font-size: 0.68rem;
color: #334155;
margin-top: 16px;
letter-spacing: 0.05em;
}}
</style>
</head>
<body>
<!-- HEADER -->
<div class="header">
<div class="header-eyebrow">Carmignac × ENSAE · Data Challenge 2025</div>
<h1>Pipeline Results Analysis Report</h1>
<div class="header-sub">Registrar ID repair · Score propagation · Surgery audit</div>
</div>
<!-- KPI STRIP -->
<div class="kpi-strip">
<div class="kpi">
<span class="kpi-label">Σ score at t_ref</span>
<span class="kpi-value success">{tl['sum_post'].iloc[-1]:.4f}</span>
<span class="kpi-sub">post-surgery</span>
</div>
<div class="kpi">
<span class="kpi-label">Σ score at t_min</span>
<span class="kpi-value accent">{tl['sum_post'].iloc[0]:.4f}</span>
<span class="kpi-sub">post-surgery</span>
</div>
<div class="kpi">
<span class="kpi-label">Max recovery</span>
<span class="kpi-value warn">{tl['recovery_pct'].max():.1f}%</span>
<span class="kpi-sub">score rescued by surgery</span>
</div>
<div class="kpi">
<span class="kpi-label">Total surgeries</span>
<span class="kpi-value">{len(surgery)}</span>
<span class="kpi-sub">operations performed</span>
</div>
<div class="kpi">
<span class="kpi-label">Reg IDs universe</span>
<span class="kpi-value">{piv.shape[1]}</span>
<span class="kpi-sub">at reference date</span>
</div>
<div class="kpi">
<span class="kpi-label">Ever remapped</span>
<span class="kpi-value warn">{len(analytics['ever_remapped'])}</span>
<span class="kpi-sub">reg IDs w/ code change</span>
</div>
</div>
<!-- MAIN -->
<div class="main">
<div class="section-label">01 · Score Integrity Over Time</div>
<!-- Chart 1: Σ score with vs without surgery -->
<div class="card">
<div class="card-header">
<span class="card-title">Sum of scores pre vs post surgery</span>
<span class="card-desc">
Post-surgery (solid) shows the corrected score after code repairs.
Pre-surgery (dashed) is the counterfactual without any remapping.
Gap = score rescued.
</span>
</div>
<div class="card-body">
<div class="chart-wrap-tall">
<canvas id="chartSigma"></canvas>
</div>
</div>
</div>
<!-- Chart 2: Score drop (pre) -->
<div class="grid-2">
<div class="card">
<div class="card-header">
<span class="card-title">Score recovered by surgery</span>
<span class="card-desc">Difference post pre at each date</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartRecovery"></canvas>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<span class="card-title">Portfolio concentration (entropy)</span>
<span class="card-desc">Shannon entropy of score distribution higher = more spread</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartEntropy"></canvas>
</div>
</div>
</div>
</div>
<div class="section-label">02 · Individual Score Trajectories</div>
<div class="card">
<div class="card-header">
<span class="card-title">Score per Registrar Account full history</span>
<span class="card-desc">
Dashed lines = accounts that were remapped at some point (surgery applied).
Solid lines = stable codes throughout.
</span>
</div>
<div class="card-body">
<div class="chart-wrap-tall" style="height:360px">
<canvas id="chartTraj"></canvas>
</div>
</div>
</div>
<div class="section-label">03 · Surgery Operations</div>
<div class="grid-2">
<div class="card">
<div class="card-header">
<span class="card-title">Surgeries per time step</span>
<span class="card-desc">Number of code remappings performed at each month</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartNSurg"></canvas>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<span class="card-title">Score gain per surgery</span>
<span class="card-desc">Average gain in Σ score from surgery at each month</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartGain"></canvas>
</div>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<span class="card-title">Jaccard similarity of surgery matches</span>
<span class="card-desc">
Composite Jaccard score of the matched code pair closer to 1.0 = stronger portfolio overlap.
Low values may indicate uncertain matches.
</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartJaccard"></canvas>
</div>
</div>
</div>
<div class="section-label">04 · Surgery Detail Log</div>
<div class="card">
<div class="card-header">
<span class="card-title">All surgery operations</span>
</div>
<div class="card-body" style="padding:0">
{'<div class="no-surg">No surgeries were performed on this dataset.</div>' if len(surgery) == 0 else f"""
<table>
<thead>
<tr>
<th>Date</th>
<th>Reg orig</th>
<th>Code from</th>
<th></th>
<th>Code to</th>
<th>Jaccard</th>
<th>Score gain</th>
<th>% of score</th>
</tr>
</thead>
<tbody>{surg_rows_html}</tbody>
</table>"""}
</div>
</div>
<div class="section-label">05 · Score Ranking at t_ref</div>
<div class="card">
<div class="card-header">
<span class="card-title">Accounts ranked by weight at reference date</span>
<span class="card-desc"> in last column = account was remapped at some point in history</span>
</div>
<div class="card-body" style="padding:0">
<table>
<thead>
<tr>
<th>Rank</th>
<th>Registrar ID</th>
<th>Score (weight)</th>
<th style="width:140px">Relative size</th>
<th>Remapped</th>
</tr>
</thead>
<tbody>{top_rows_html}</tbody>
</table>
</div>
</div>
</div><!-- /main -->
<div class="footer">Generated by carmignac_analysis.py · Carmignac × ENSAE Data Challenge 2025</div>
<!-- CHARTS JS -->
<script>
Chart.defaults.color = '#64748b';
Chart.defaults.borderColor = '#1e2535';
Chart.defaults.font.family = "'IBM Plex Mono', monospace";
Chart.defaults.font.size = 11;
const DATES = {js(dates_str)};
const SUM_POST = {jf(tl['sum_post'].values)};
const SUM_PRE = {jf(tl['sum_pre'].values)};
const RECOVERY = {jf(tl['recovery_pct'].values, 4)};
const ENTROPY = {jf(tl['entropy'].values, 4)};
const SURG_DATES = {js(surg_dates)};
const N_SURG = {n_surg};
const TOTAL_GAIN = {total_gain};
const AVG_GAIN = {avg_gain};
const AVG_JACCARD = {avg_jaccard};
const TRAJ = {traj_json};
// Shared options helpers
function timeAxis(label) {{
return {{
type: 'category',
ticks: {{ maxTicksLimit: 10, maxRotation: 0 }},
grid: {{ color: '#1a2030' }},
title: {{ display: !!label, text: label, color: '#475569' }},
}};
}}
function yAxis(label, opts={{}}) {{
return {{
grid: {{ color: '#1a2030' }},
title: {{ display: !!label, text: label, color: '#475569' }},
...opts,
}};
}}
function tooltip() {{
return {{
backgroundColor: '#0d1117',
borderColor: '#252a35',
borderWidth: 1,
titleFont: {{ family: "'IBM Plex Mono'" }},
bodyFont: {{ family: "'IBM Plex Mono'" }},
padding: 10,
}};
}}
// 1. Sigma pre/post
new Chart(document.getElementById('chartSigma'), {{
type: 'line',
data: {{
labels: DATES,
datasets: [
{{
label: 'Σ score (post-surgery)',
data: SUM_POST,
borderColor: '#10b981',
backgroundColor: '#10b98115',
borderWidth: 2.5,
pointRadius: 0,
fill: false,
tension: 0.2,
}},
{{
label: 'Σ score (pre-surgery / counterfactual)',
data: SUM_PRE,
borderColor: '#ef4444',
borderDash: [6, 4],
borderWidth: 1.5,
pointRadius: 0,
fill: false,
tension: 0.2,
backgroundColor: 'transparent',
}},
],
}},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{ mode: 'index', intersect: false }},
plugins: {{
legend: {{ position: 'top', labels: {{ boxWidth: 12, padding: 16 }} }},
tooltip: tooltip(),
}},
scales: {{
x: timeAxis(),
y: yAxis('Σ scores', {{ min: 0, max: 1.05, ticks: {{ stepSize: 0.1 }} }}),
}},
}},
}});
// 2. Recovery
new Chart(document.getElementById('chartRecovery'), {{
type: 'bar',
data: {{
labels: DATES,
datasets: [{{
label: 'Score recovered (%)',
data: RECOVERY,
backgroundColor: '#3b82f6aa',
borderColor: '#3b82f6',
borderWidth: 1,
borderRadius: 2,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{
x: timeAxis(),
y: yAxis('Recovery (% of Σ)', {{ min: 0 }}),
}},
}},
}});
// 3. Entropy
new Chart(document.getElementById('chartEntropy'), {{
type: 'line',
data: {{
labels: DATES,
datasets: [{{
label: 'Shannon entropy',
data: ENTROPY,
borderColor: '#d97706',
backgroundColor: '#d9770622',
borderWidth: 2,
pointRadius: 0,
fill: true,
tension: 0.3,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{ x: timeAxis(), y: yAxis('Entropy (nats)') }},
}},
}});
// 4. Trajectories
new Chart(document.getElementById('chartTraj'), {{
type: 'line',
data: {{ labels: DATES, datasets: TRAJ }},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{ mode: 'index', intersect: false }},
plugins: {{
legend: {{ position: 'right', labels: {{ boxWidth: 10, padding: 10, font: {{ size: 10 }} }} }},
tooltip: {{ ...tooltip(), itemSort: (a,b) => b.raw - a.raw }},
}},
scales: {{ x: timeAxis(), y: yAxis('Score (weight)') }},
}},
}});
// 5. N surgeries
new Chart(document.getElementById('chartNSurg'), {{
type: 'bar',
data: {{
labels: SURG_DATES,
datasets: [{{
label: 'Surgeries',
data: N_SURG,
backgroundColor: '#7c3aed99',
borderColor: '#7c3aed',
borderWidth: 1,
borderRadius: 3,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{
x: timeAxis('Month'),
y: yAxis('# operations', {{ min: 0, ticks: {{ stepSize: 1 }} }}),
}},
}},
}});
// 6. Avg gain
new Chart(document.getElementById('chartGain'), {{
type: 'bar',
data: {{
labels: SURG_DATES,
datasets: [
{{
label: 'Total gain',
data: TOTAL_GAIN,
backgroundColor: '#10b98199',
borderColor: '#10b981',
borderWidth: 1,
borderRadius: 3,
}},
{{
label: 'Avg gain / surgery',
data: AVG_GAIN,
backgroundColor: '#06b6d455',
borderColor: '#06b6d4',
borderWidth: 1,
borderRadius: 3,
}},
],
}},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{ mode: 'index', intersect: false }},
plugins: {{ legend: {{ position: 'top', labels: {{ boxWidth: 10 }} }}, tooltip: tooltip() }},
scales: {{ x: timeAxis('Month'), y: yAxis('Score gain') }},
}},
}});
// 7. Jaccard
new Chart(document.getElementById('chartJaccard'), {{
type: 'bar',
data: {{
labels: SURG_DATES,
datasets: [{{
label: 'Avg Jaccard composite',
data: AVG_JACCARD,
backgroundColor: '#f59e0b88',
borderColor: '#f59e0b',
borderWidth: 1,
borderRadius: 3,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{
x: timeAxis('Month'),
y: yAxis('Jaccard composite', {{ min: 0, max: 1.05 }}),
}},
}},
}});
</script>
</body>
</html>"""
return html
# ─────────────────────────────────────────────────────────────
# 5. MAIN
# ─────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Carmignac pipeline results analyser")
parser.add_argument("--scores", default="repair_results/carmignac_scores.csv")
parser.add_argument("--mapping", default="repair_results/carmignac_mapping.csv")
parser.add_argument("--surgery", default="repair_results/carmignac_surgery_log.csv")
parser.add_argument("--out", default="repair_results/carmignac_report.html")
args = parser.parse_args()
# Resolve paths relative to this script's directory if files not found
base = os.path.dirname(os.path.abspath(__file__))
def resolve(p):
if os.path.exists(p):
return p
alt = os.path.join(base, p)
if os.path.exists(alt):
return alt
sys.exit(f"[ERROR] File not found: {p}")
scores_path = resolve(args.scores)
mapping_path = resolve(args.mapping)
surgery_path = resolve(args.surgery)
print(f"[Load] scores : {scores_path}")
print(f"[Load] mapping : {mapping_path}")
print(f"[Load] surgery : {surgery_path}")
scores, mapping, surgery = load_outputs(scores_path, mapping_path, surgery_path)
analytics = compute_analytics(scores, mapping, surgery)
print_summary(analytics, surgery)
html = build_html(analytics, surgery, scores, mapping)
out_path = args.out
with open(out_path, "w", encoding="utf-8") as f:
f.write(html)
print(f"\n[Report] Written to → {out_path}")
if __name__ == "__main__":
main()