Project_Carmignac/repair_challenge/carmignac_analysis.py

1352 lines
45 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Carmignac Data Challenge — Pipeline Results Analysis
=====================================================
Analyses the CSV outputs produced by carmignac_repair.py:
- carmignac_scores.csv (post-surgery score history)
- carmignac_mapping.csv (reg_id mapping history)
- carmignac_surgery_log.csv (surgery operations)
Produces a self-contained HTML report with interactive charts.
Usage:
python carmignac_analysis.py
python carmignac_analysis.py --scores path/to/scores.csv \
--mapping path/to/mapping.csv \
--surgery path/to/surgery_log.csv \
--out report.html
"""
import argparse
import json
import os
import sys
import numpy as np
import pandas as pd
# ─────────────────────────────────────────────────────────────
# 1. LOAD & VALIDATE
# ─────────────────────────────────────────────────────────────
def load_outputs(scores_path, mapping_path, surgery_path):
scores = pd.read_csv(scores_path, parse_dates=["date"])
mapping = pd.read_csv(mapping_path, parse_dates=["date"])
surgery = pd.read_csv(surgery_path, parse_dates=["date"])
# Normalise dtypes
scores["reg_id"] = scores["reg_id"].astype(str)
mapping["reg_orig"] = mapping["reg_orig"].astype(str)
mapping["reg_used"] = mapping["reg_used"].astype(str)
mapping["changed"] = mapping["changed"].astype(bool)
surgery["reg_orig"] = surgery["reg_orig"].astype(str)
surgery["reg_from"] = surgery["reg_from"].astype(str)
surgery["reg_to"] = surgery["reg_to"].astype(str)
return scores, mapping, surgery
# ─────────────────────────────────────────────────────────────
# 2. COMPUTE ANALYTICS
# ─────────────────────────────────────────────────────────────
def compute_analytics(scores, mapping, surgery):
dates = sorted(scores["date"].unique())
# ── 2.1 Sum of scores per date (post-surgery) ──────────────
sum_post = (scores.groupby("date")["score"]
.sum()
.reindex(dates)
.rename("sum_post"))
# ── 2.2 Reconstruct pre-surgery (counterfactual) ───────────
# Without surgery, every reg_id that had a hard break would score 0
# from that date backwards. We propagate the surgery "gain" as a
# cumulative deficit going back in time.
gain_by_date = surgery.groupby("date")["gain_vs_no_surgery"].sum()
# cumulative deficit = sum of gains for all surgeries at or after date t
cumulative_deficit = pd.Series(0.0, index=dates)
for d in dates:
cumulative_deficit[d] = gain_by_date[gain_by_date.index >= d].sum()
sum_pre = (sum_post - cumulative_deficit).clip(lower=0).rename("sum_pre")
timeline = pd.DataFrame({"sum_post": sum_post, "sum_pre": sum_pre})
timeline.index = pd.to_datetime(timeline.index)
timeline["recovery_pct"] = np.where(
sum_pre < sum_post,
(sum_post - sum_pre) / sum_post.clip(lower=1e-9) * 100,
0.0,
)
# ── 2.3 Per-date surgery stats ─────────────────────────────
surgery_stats = (
surgery.groupby("date")
.agg(
n_surgeries = ("reg_orig", "count"),
total_gain = ("gain_vs_no_surgery", "sum"),
avg_gain = ("gain_vs_no_surgery", "mean"),
avg_jaccard = ("jaccard_composite", "mean"),
avg_score_before = ("score_before", "mean"),
avg_score_after = ("score_after", "mean"),
)
.reindex(dates, fill_value=0)
)
# ── 2.4 Score distribution over time ───────────────────────
# Wide format: rows=dates, cols=reg_ids
pivot = scores.pivot_table(index="date", columns="reg_id",
values="score", aggfunc="last")
pivot = pivot.reindex(dates)
pivot.index = pd.to_datetime(pivot.index)
# ── 2.5 Mapping churn ──────────────────────────────────────
# For each date, how many reg_ids are remapped (not using their original code)?
churn = (mapping.groupby("date")["changed"]
.sum()
.reindex(dates, fill_value=0)
.rename("n_remapped"))
# ── 2.6 Score entropy (distribution spread) ────────────────
def entropy(row):
p = row.dropna()
p = p[p > 0]
if len(p) == 0:
return np.nan
p = p / p.sum()
return -(p * np.log(p)).sum()
timeline["entropy"] = pivot.apply(entropy, axis=1).values
# ── 2.7 Individual score trajectories ──────────────────────
# Identify which reg_ids were ever remapped
ever_remapped = set(mapping.loc[mapping["changed"], "reg_orig"].unique())
# ── 2.8 Surgery detail table ───────────────────────────────
surgery_detail = surgery.copy()
surgery_detail["gain_pct_of_score"] = (
surgery_detail["gain_vs_no_surgery"]
/ surgery_detail["score_before"].clip(lower=1e-9) * 100
).round(2)
return {
"timeline": timeline,
"surgery_stats": surgery_stats,
"pivot": pivot,
"churn": churn,
"ever_remapped": ever_remapped,
"surgery_detail": surgery_detail,
"dates": [d.strftime("%Y-%m-%d") for d in dates],
}
# ─────────────────────────────────────────────────────────────
# 3. PRINT CONSOLE SUMMARY
# ─────────────────────────────────────────────────────────────
def print_summary(analytics, surgery):
tl = analytics["timeline"]
ss = analytics["surgery_stats"]
print("\n" + "=" * 65)
print(" CARMIGNAC PIPELINE — RESULTS SUMMARY")
print("=" * 65)
print(f"\n Date range : {tl.index.min().date()}{tl.index.max().date()}")
print(f" Total months : {len(tl)}")
print(f" Reg IDs : {analytics['pivot'].shape[1]}")
print(f"\n ── Score (Σ) ──────────────────────────────────────────")
print(f" At t_ref (latest) : {tl['sum_post'].iloc[-1]:.6f}")
print(f" At t_min (earliest): {tl['sum_post'].iloc[0]:.6f}")
print(f" Min (post-surgery) : {tl['sum_post'].min():.6f} "
f"({tl['sum_post'].idxmin().date()})")
print(f" Min (pre-surgery) : {tl['sum_pre'].min():.6f} "
f"({tl['sum_pre'].idxmin().date()})")
print(f" Max recovery (pct) : {tl['recovery_pct'].max():.2f}%")
print(f"\n ── Surgeries ─────────────────────────────────────────")
if len(surgery) == 0:
print(" No surgeries performed.")
else:
print(f" Total operations : {len(surgery)}")
print(f" Total score gained : {surgery['gain_vs_no_surgery'].sum():.6f}")
print(f" Avg Jaccard : {surgery['jaccard_composite'].mean():.4f}")
print(f" Avg gain / surgery : {surgery['gain_vs_no_surgery'].mean():.6f}")
print()
print(f" {'Date':12s} {'Reg orig':12s} {'From':15s} {'To':15s} "
f"{'Jaccard':>8s} {'Gain':>10s}")
print(" " + "-" * 78)
for _, row in surgery.sort_values("date").iterrows():
print(f" {str(row['date'].date()):12s} {row['reg_orig']:12s} "
f"{row['reg_from']:15s} {row['reg_to']:15s} "
f"{row['jaccard_composite']:8.4f} {row['gain_vs_no_surgery']:10.6f}")
print(f"\n ── Mapping churn ─────────────────────────────────────")
ch = analytics["churn"]
print(f" Max remapped at one date : {int(ch.max())} ({ch.idxmax().date() if ch.max()>0 else 'N/A'})")
print(f" Reg IDs ever remapped : {len(analytics['ever_remapped'])}")
print(f"\n ── Score entropy (distribution spread) ───────────────")
ent = analytics["timeline"]["entropy"]
print(f" Mean entropy : {ent.mean():.4f}")
print(f" Std entropy : {ent.std():.4f}")
print()
# ─────────────────────────────────────────────────────────────
# 4. BUILD HTML REPORT
# ─────────────────────────────────────────────────────────────
def build_html(analytics, surgery, scores, mapping):
tl = analytics["timeline"]
ss = analytics["surgery_stats"]
piv = analytics["pivot"]
ch = analytics["churn"]
dates_str = analytics["dates"]
# ── helpers to serialise for JS ─────────────────────────────
def jf(arr, decimals=6):
return json.dumps([round(float(v), decimals) if not np.isnan(v) else None
for v in arr])
def js(arr):
return json.dumps(list(arr))
# ── colour palette ───────────────────────────────────────────
REG_COLORS = [
"#2563eb","#16a34a","#dc2626","#d97706","#7c3aed",
"#0891b2","#db2777","#65a30d","#ea580c","#6366f1",
"#059669","#b45309","#9333ea","#0284c7","#e11d48",
]
# ── 4.1 Surgery sparkline data ──────────────────────────────
surg_dates = [d.strftime("%Y-%m-%d") for d in ss.index]
n_surg = jf(ss["n_surgeries"].values, 0)
total_gain = jf(ss["total_gain"].values)
avg_gain = jf(ss["avg_gain"].values)
avg_jaccard = jf(ss["avg_jaccard"].values)
# ── 4.2 Individual trajectories ────────────────────────────
reg_ids = list(piv.columns)
traj_datasets = []
# Surgery lookup: reg_orig -> list of {date, from, to, composite}
surg_by_reg = {}
for _, row in surgery.iterrows():
surg_by_reg.setdefault(row["reg_orig"], []).append({
"date": row["date"].strftime("%Y-%m-%d"),
"reg_from": str(row["reg_from"]),
"reg_to": str(row["reg_to"]),
"composite": round(float(row["jaccard_composite"]), 4),
"gain": round(float(row["gain_vs_no_surgery"]), 6),
})
for idx, rid in enumerate(reg_ids):
remapped = rid in analytics["ever_remapped"]
traj_datasets.append({
"label": rid,
"data": [round(float(v), 6) if not np.isnan(v) else None
for v in piv[rid].values],
"borderColor": REG_COLORS[idx % len(REG_COLORS)],
"backgroundColor": REG_COLORS[idx % len(REG_COLORS)] + "22",
"borderWidth": 2,
"borderDash": [6, 3] if remapped else [],
"pointRadius": 0,
"tension": 0.3,
"fill": False,
"remapped": remapped,
"surgeries": surg_by_reg.get(rid, []),
})
traj_json = json.dumps(traj_datasets)
# ── 4.3 Surgery detail table rows ──────────────────────────
sd = analytics["surgery_detail"].sort_values("date")
surg_rows_html = ""
if len(sd) == 0:
surg_rows_html = "<tr><td colspan='8' style='text-align:center;color:#888'>No surgeries performed</td></tr>"
else:
for _, r in sd.iterrows():
gain_class = "gain-high" if r["gain_vs_no_surgery"] > 0.05 else "gain-low"
surg_rows_html += f"""
<tr>
<td>{r['date'].date()}</td>
<td><span class="reg-badge">{r['reg_orig']}</span></td>
<td class="code-cell">{r['reg_from']}</td>
<td>→</td>
<td class="code-cell">{r['reg_to']}</td>
<td>{r['jaccard_composite']:.4f}</td>
<td class="{gain_class}">+{r['gain_vs_no_surgery']:.6f}</td>
<td>{r['gain_pct_of_score']:.1f}%</td>
</tr>"""
# ── 4.4 Top accounts table ──────────────────────────────────
last_date = piv.index.max()
top_accounts = piv.loc[last_date].dropna().sort_values(ascending=False)
top_rows_html = ""
for rank, (rid, sc) in enumerate(top_accounts.items(), 1):
remapped = "" if rid in analytics["ever_remapped"] else ""
bar_w = int(sc / top_accounts.max() * 100)
color = REG_COLORS[(rank - 1) % len(REG_COLORS)]
top_rows_html += f"""
<tr>
<td class="rank">#{rank}</td>
<td><span class="reg-badge" style="background:{color}22;border-color:{color}">{rid}</span></td>
<td class="score-val">{sc:.6f}</td>
<td class="bar-cell">
<div class="score-bar" style="width:{bar_w}%;background:{color}"></div>
</td>
<td class="center">{remapped}</td>
</tr>"""
# ─────────────────────────────────────────────────────────────
# HTML TEMPLATE
# ─────────────────────────────────────────────────────────────
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Carmignac Pipeline — Analysis Report</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
<style>
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;600;700&display=swap');
:root {{
--bg: #0d0f12;
--surface: #151820;
--border: #252a35;
--accent: #3b82f6;
--accent2: #10b981;
--warn: #f59e0b;
--danger: #ef4444;
--text: #e2e8f0;
--muted: #64748b;
--mono: 'IBM Plex Mono', monospace;
--sans: 'IBM Plex Sans', sans-serif;
}}
*, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{
font-family: var(--sans);
background: var(--bg);
color: var(--text);
min-height: 100vh;
padding: 0 0 60px;
}}
/* ── Header ── */
.header {{
background: linear-gradient(135deg, #0d1117 0%, #111827 50%, #0d1f3c 100%);
border-bottom: 1px solid var(--border);
padding: 40px 48px 36px;
position: relative;
overflow: hidden;
}}
.header::before {{
content: '';
position: absolute;
inset: 0;
background: radial-gradient(ellipse 70% 80% at 80% 50%, #1e40af18, transparent);
pointer-events: none;
}}
.header-eyebrow {{
font-family: var(--mono);
font-size: 11px;
letter-spacing: 0.15em;
color: var(--accent);
text-transform: uppercase;
margin-bottom: 10px;
}}
.header h1 {{
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.02em;
line-height: 1.1;
margin-bottom: 8px;
}}
.header-sub {{
font-size: 0.85rem;
color: var(--muted);
font-family: var(--mono);
}}
/* ── KPI strip ── */
.kpi-strip {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
gap: 1px;
background: var(--border);
border-bottom: 1px solid var(--border);
}}
.kpi {{
background: var(--surface);
padding: 22px 28px;
display: flex;
flex-direction: column;
gap: 4px;
}}
.kpi-label {{
font-size: 0.7rem;
letter-spacing: 0.1em;
text-transform: uppercase;
color: var(--muted);
font-family: var(--mono);
}}
.kpi-value {{
font-size: 1.6rem;
font-weight: 700;
font-family: var(--mono);
color: var(--text);
line-height: 1;
}}
.kpi-value.accent {{ color: var(--accent); }}
.kpi-value.success {{ color: var(--accent2); }}
.kpi-value.warn {{ color: var(--warn); }}
.kpi-sub {{
font-size: 0.7rem;
color: var(--muted);
font-family: var(--mono);
}}
/* ── Main layout ── */
.main {{
max-width: 1400px;
margin: 0 auto;
padding: 36px 48px;
display: flex;
flex-direction: column;
gap: 32px;
}}
/* ── Cards ── */
.card {{
background: var(--surface);
border: 1px solid var(--border);
border-radius: 8px;
overflow: hidden;
}}
.card-header {{
padding: 18px 24px 14px;
border-bottom: 1px solid var(--border);
display: flex;
align-items: baseline;
gap: 12px;
}}
.card-title {{
font-size: 0.8rem;
font-weight: 600;
letter-spacing: 0.1em;
text-transform: uppercase;
color: var(--muted);
font-family: var(--mono);
}}
.card-desc {{
font-size: 0.78rem;
color: #475569;
}}
.card-body {{
padding: 24px;
}}
.chart-wrap {{
position: relative;
height: 280px;
}}
.chart-wrap-tall {{
position: relative;
height: 340px;
}}
/* ── Two-column grid ── */
.grid-2 {{
display: grid;
grid-template-columns: 1fr 1fr;
gap: 24px;
}}
@media (max-width: 900px) {{
.grid-2 {{ grid-template-columns: 1fr; }}
.main {{ padding: 24px 20px; }}
}}
/* ── Section label ── */
.section-label {{
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.15em;
text-transform: uppercase;
color: var(--muted);
padding: 0 4px;
border-left: 3px solid var(--accent);
padding-left: 10px;
margin-bottom: -8px;
}}
/* ── Tables ── */
table {{
width: 100%;
border-collapse: collapse;
font-size: 0.82rem;
}}
th {{
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--muted);
padding: 10px 14px;
text-align: left;
border-bottom: 1px solid var(--border);
background: #0f1218;
}}
td {{
padding: 10px 14px;
border-bottom: 1px solid #1a1f2a;
vertical-align: middle;
}}
tr:last-child td {{ border-bottom: none; }}
tr:hover td {{ background: #181e2b; }}
.rank {{ color: var(--muted); font-family: var(--mono); font-size: 0.75rem; }}
.score-val {{ font-family: var(--mono); color: var(--accent2); }}
.code-cell {{ font-family: var(--mono); font-size: 0.78rem; color: #94a3b8; }}
.center {{ text-align: center; color: var(--accent2); }}
.gain-high {{ font-family: var(--mono); color: var(--accent2); font-weight: 600; }}
.gain-low {{ font-family: var(--mono); color: var(--warn); }}
.bar-cell {{ width: 120px; }}
.score-bar {{
height: 6px;
border-radius: 3px;
min-width: 2px;
transition: width 0.3s;
}}
.reg-badge {{
display: inline-block;
padding: 2px 8px;
border-radius: 4px;
background: #1e2a3a;
border: 1px solid #2d3f54;
font-family: var(--mono);
font-size: 0.75rem;
color: var(--accent);
white-space: nowrap;
}}
/* ── Legend patch ── */
.legend-patch {{
display: inline-block;
width: 12px; height: 12px;
border-radius: 2px;
margin-right: 4px;
vertical-align: middle;
}}
/* ── No-surgery notice ── */
.no-surg {{
padding: 32px;
text-align: center;
color: var(--muted);
font-family: var(--mono);
font-size: 0.82rem;
}}
/* ── Trajectory explorer ── */
.badge-surgery {{
font-family: var(--mono);
font-size: 0.68rem;
color: var(--warn);
background: #f59e0b18;
border: 1px solid #f59e0b44;
border-radius: 3px;
padding: 1px 6px;
margin-left: 4px;
}}
.traj-selector {{
display: flex;
flex-wrap: wrap;
gap: 6px;
padding: 16px 20px;
border-bottom: 1px solid var(--border);
background: #0f1218;
}}
.traj-btn {{
font-family: var(--mono);
font-size: 0.72rem;
padding: 5px 12px;
border-radius: 4px;
border: 1px solid var(--border);
background: var(--surface);
color: var(--muted);
cursor: pointer;
transition: all 0.15s;
}}
.traj-btn:hover {{ border-color: var(--acc, var(--accent)); color: var(--text); }}
.traj-btn.active {{
background: color-mix(in srgb, var(--acc, var(--accent)) 15%, transparent);
border-color: var(--acc, var(--accent));
color: var(--acc, var(--accent));
}}
.traj-btn-badge {{
font-size: 0.6rem;
background: #f59e0b33;
color: var(--warn);
border-radius: 3px;
padding: 0 4px;
margin-left: 4px;
}}
.traj-focus-wrap {{ padding: 16px 24px 8px; }}
.traj-account-meta {{
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 8px;
margin-bottom: 12px;
min-height: 28px;
}}
.meta-id {{
font-family: var(--mono);
font-size: 0.85rem;
font-weight: 600;
padding: 3px 10px;
border-radius: 4px;
border: 1.5px solid;
}}
.meta-surgs {{ display: flex; flex-wrap: wrap; gap: 6px; }}
.surg-chip {{
font-family: var(--mono);
font-size: 0.7rem;
color: var(--warn);
background: #f59e0b18;
border: 1px solid #f59e0b44;
border-radius: 4px;
padding: 2px 8px;
}}
.spark-section-label {{
font-family: var(--mono);
font-size: 0.65rem;
letter-spacing: 0.12em;
text-transform: uppercase;
color: var(--muted);
padding: 14px 24px 6px;
border-top: 1px solid var(--border);
margin-top: 8px;
}}
.spark-grid {{
display: grid;
grid-template-columns: repeat(auto-fill, minmax(140px, 1fr));
gap: 1px;
background: var(--border);
border-top: 1px solid var(--border);
}}
.spark-cell {{
background: var(--surface);
padding: 10px 12px 8px;
cursor: pointer;
transition: background 0.12s;
}}
.spark-cell:hover {{ background: #1a2030; }}
.spark-cell.active {{ background: #131b2a; outline: 1px solid #3b82f644; }}
.spark-label {{
font-family: var(--mono);
font-size: 0.68rem;
color: var(--muted);
margin-bottom: 4px;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}}
.spark-badge {{
font-size: 0.58rem;
color: var(--warn);
background: #f59e0b22;
border-radius: 2px;
padding: 0 3px;
}}
/* ── Footer ── */
.footer {{
text-align: center;
font-family: var(--mono);
font-size: 0.68rem;
color: #334155;
margin-top: 16px;
letter-spacing: 0.05em;
}}
</style>
</head>
<body>
<!-- ═══════════════════════════════════════════ HEADER -->
<div class="header">
<div class="header-eyebrow">Carmignac × ENSAE · Data Challenge 2025</div>
<h1>Pipeline Results — Analysis Report</h1>
<div class="header-sub">Registrar ID repair · Score propagation · Surgery audit</div>
</div>
<!-- ═══════════════════════════════════════════ KPI STRIP -->
<div class="kpi-strip">
<div class="kpi">
<span class="kpi-label">Σ score at t_ref</span>
<span class="kpi-value success">{tl['sum_post'].iloc[-1]:.4f}</span>
<span class="kpi-sub">post-surgery</span>
</div>
<div class="kpi">
<span class="kpi-label">Σ score at t_min</span>
<span class="kpi-value accent">{tl['sum_post'].iloc[0]:.4f}</span>
<span class="kpi-sub">post-surgery</span>
</div>
<div class="kpi">
<span class="kpi-label">Max recovery</span>
<span class="kpi-value warn">{tl['recovery_pct'].max():.1f}%</span>
<span class="kpi-sub">score rescued by surgery</span>
</div>
<div class="kpi">
<span class="kpi-label">Total surgeries</span>
<span class="kpi-value">{len(surgery)}</span>
<span class="kpi-sub">operations performed</span>
</div>
<div class="kpi">
<span class="kpi-label">Reg IDs universe</span>
<span class="kpi-value">{piv.shape[1]}</span>
<span class="kpi-sub">at reference date</span>
</div>
<div class="kpi">
<span class="kpi-label">Ever remapped</span>
<span class="kpi-value warn">{len(analytics['ever_remapped'])}</span>
<span class="kpi-sub">reg IDs w/ code change</span>
</div>
</div>
<!-- ═══════════════════════════════════════════ MAIN -->
<div class="main">
<div class="section-label">01 · Score Integrity Over Time</div>
<!-- Chart 1: Σ score with vs without surgery -->
<div class="card">
<div class="card-header">
<span class="card-title">Sum of scores — pre vs post surgery</span>
<span class="card-desc">
Post-surgery (solid) shows the corrected score after code repairs.
Pre-surgery (dashed) is the counterfactual without any remapping.
Gap = score rescued.
</span>
</div>
<div class="card-body">
<div class="chart-wrap-tall">
<canvas id="chartSigma"></canvas>
</div>
</div>
</div>
<!-- Chart 2: Score drop (pre) -->
<div class="grid-2">
<div class="card">
<div class="card-header">
<span class="card-title">Score recovered by surgery</span>
<span class="card-desc">Difference post pre at each date</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartRecovery"></canvas>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<span class="card-title">Portfolio concentration (entropy)</span>
<span class="card-desc">Shannon entropy of score distribution — higher = more spread</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartEntropy"></canvas>
</div>
</div>
</div>
</div>
<div class="section-label">02 · Individual Score Trajectories</div>
<div class="card">
<div class="card-header">
<span class="card-title">Score explorer — per Registrar Account</span>
<span class="card-desc">
Click an account to inspect its full history.
<span class="badge-surgery">◆ remapped</span> = surgery was applied.
</span>
</div>
<div class="card-body" style="padding:0">
<!-- Selector pill bar -->
<div class="traj-selector" id="trajSelector"></div>
<!-- Focused chart + metadata -->
<div class="traj-focus-wrap">
<div class="traj-account-meta" id="trajMeta"></div>
<div style="position:relative;height:300px">
<canvas id="chartTrajFocus"></canvas>
</div>
</div>
<!-- Sparkline overview grid -->
<div class="spark-section-label">All accounts — overview</div>
<div class="spark-grid" id="sparkGrid"></div>
</div>
</div>
<div class="section-label">03 · Surgery Operations</div>
<div class="grid-2">
<div class="card">
<div class="card-header">
<span class="card-title">Surgeries per time step</span>
<span class="card-desc">Number of code remappings performed at each month</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartNSurg"></canvas>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<span class="card-title">Score gain per surgery</span>
<span class="card-desc">Average gain in Σ score from surgery at each month</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartGain"></canvas>
</div>
</div>
</div>
</div>
<div class="card">
<div class="card-header">
<span class="card-title">Jaccard similarity of surgery matches</span>
<span class="card-desc">
Composite Jaccard score of the matched code pair — closer to 1.0 = stronger portfolio overlap.
Low values may indicate uncertain matches.
</span>
</div>
<div class="card-body">
<div class="chart-wrap">
<canvas id="chartJaccard"></canvas>
</div>
</div>
</div>
<div class="section-label">04 · Surgery Detail Log</div>
<div class="card">
<div class="card-header">
<span class="card-title">All surgery operations</span>
</div>
<div class="card-body" style="padding:0">
{'<div class="no-surg">No surgeries were performed on this dataset.</div>' if len(surgery) == 0 else f"""
<table>
<thead>
<tr>
<th>Date</th>
<th>Reg orig</th>
<th>Code from</th>
<th></th>
<th>Code to</th>
<th>Jaccard</th>
<th>Score gain</th>
<th>% of score</th>
</tr>
</thead>
<tbody>{surg_rows_html}</tbody>
</table>"""}
</div>
</div>
<div class="section-label">05 · Score Ranking at t_ref</div>
<div class="card">
<div class="card-header">
<span class="card-title">Accounts ranked by weight at reference date</span>
<span class="card-desc">✓ in last column = account was remapped at some point in history</span>
</div>
<div class="card-body" style="padding:0">
<table>
<thead>
<tr>
<th>Rank</th>
<th>Registrar ID</th>
<th>Score (weight)</th>
<th style="width:140px">Relative size</th>
<th>Remapped</th>
</tr>
</thead>
<tbody>{top_rows_html}</tbody>
</table>
</div>
</div>
</div><!-- /main -->
<div class="footer">Generated by carmignac_analysis.py · Carmignac × ENSAE Data Challenge 2025</div>
<!-- ═══════════════════════════════════════════ CHARTS JS -->
<script>
Chart.defaults.color = '#64748b';
Chart.defaults.borderColor = '#1e2535';
Chart.defaults.font.family = "'IBM Plex Mono', monospace";
Chart.defaults.font.size = 11;
const DATES = {js(dates_str)};
const SUM_POST = {jf(tl['sum_post'].values)};
const SUM_PRE = {jf(tl['sum_pre'].values)};
const RECOVERY = {jf(tl['recovery_pct'].values, 4)};
const ENTROPY = {jf(tl['entropy'].values, 4)};
const SURG_DATES = {js(surg_dates)};
const N_SURG = {n_surg};
const TOTAL_GAIN = {total_gain};
const AVG_GAIN = {avg_gain};
const AVG_JACCARD = {avg_jaccard};
const TRAJ = {traj_json};
// ── Shared options helpers ────────────────────────────────────
function timeAxis(label) {{
return {{
type: 'category',
ticks: {{ maxTicksLimit: 10, maxRotation: 0 }},
grid: {{ color: '#1a2030' }},
title: {{ display: !!label, text: label, color: '#475569' }},
}};
}}
function yAxis(label, opts={{}}) {{
return {{
grid: {{ color: '#1a2030' }},
title: {{ display: !!label, text: label, color: '#475569' }},
...opts,
}};
}}
function tooltip() {{
return {{
backgroundColor: '#0d1117',
borderColor: '#252a35',
borderWidth: 1,
titleFont: {{ family: "'IBM Plex Mono'" }},
bodyFont: {{ family: "'IBM Plex Mono'" }},
padding: 10,
}};
}}
// ── 1. Sigma pre/post ─────────────────────────────────────────
new Chart(document.getElementById('chartSigma'), {{
type: 'line',
data: {{
labels: DATES,
datasets: [
{{
label: 'Σ score (post-surgery)',
data: SUM_POST,
borderColor: '#10b981',
backgroundColor: '#10b98115',
borderWidth: 2.5,
pointRadius: 0,
fill: false,
tension: 0.2,
}},
{{
label: 'Σ score (pre-surgery / counterfactual)',
data: SUM_PRE,
borderColor: '#ef4444',
borderDash: [6, 4],
borderWidth: 1.5,
pointRadius: 0,
fill: false,
tension: 0.2,
backgroundColor: 'transparent',
}},
],
}},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{ mode: 'index', intersect: false }},
plugins: {{
legend: {{ position: 'top', labels: {{ boxWidth: 12, padding: 16 }} }},
tooltip: tooltip(),
}},
scales: {{
x: timeAxis(),
y: yAxis('Σ scores', {{ min: 0, max: 1.05, ticks: {{ stepSize: 0.1 }} }}),
}},
}},
}});
// ── 2. Recovery ───────────────────────────────────────────────
new Chart(document.getElementById('chartRecovery'), {{
type: 'bar',
data: {{
labels: DATES,
datasets: [{{
label: 'Score recovered (%)',
data: RECOVERY,
backgroundColor: '#3b82f6aa',
borderColor: '#3b82f6',
borderWidth: 1,
borderRadius: 2,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{
x: timeAxis(),
y: yAxis('Recovery (% of Σ)', {{ min: 0 }}),
}},
}},
}});
// ── 3. Entropy ────────────────────────────────────────────────
new Chart(document.getElementById('chartEntropy'), {{
type: 'line',
data: {{
labels: DATES,
datasets: [{{
label: 'Shannon entropy',
data: ENTROPY,
borderColor: '#d97706',
backgroundColor: '#d9770622',
borderWidth: 2,
pointRadius: 0,
fill: true,
tension: 0.3,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{ x: timeAxis(), y: yAxis('Entropy (nats)') }},
}},
}});
// ── 4. Trajectory explorer ───────────────────────────────────
(function() {{
let focusChart = null;
let sparkCharts = [];
let activeIdx = 0;
// Surgery annotation plugin (vertical line + label)
const surgeryPlugin = {{
id: 'surgeryLines',
afterDraw(chart) {{
const surgeries = chart._surgeries || [];
if (!surgeries.length) return;
const {{ctx, chartArea, scales}} = chart;
surgeries.forEach(op => {{
const xIdx = DATES.indexOf(op.date);
if (xIdx < 0) return;
const x = scales.x.getPixelForValue(xIdx);
ctx.save();
ctx.strokeStyle = '#f59e0bcc';
ctx.lineWidth = 1.5;
ctx.setLineDash([4, 3]);
ctx.beginPath();
ctx.moveTo(x, chartArea.top);
ctx.lineTo(x, chartArea.bottom);
ctx.stroke();
// Label
ctx.font = "10px 'IBM Plex Mono'";
ctx.fillStyle = '#f59e0b';
ctx.textAlign = 'center';
ctx.fillText('', x, chartArea.top + 10);
ctx.restore();
}});
}}
}};
function buildFocusChart(idx) {{
const d = TRAJ[idx];
if (focusChart) focusChart.destroy();
const ctx = document.getElementById('chartTrajFocus').getContext('2d');
focusChart = new Chart(ctx, {{
type: 'line',
data: {{
labels: DATES,
datasets: [{{
label: d.label,
data: d.data,
borderColor: d.borderColor,
backgroundColor: d.borderColor + '18',
borderWidth: 2.5,
borderDash: d.borderDash,
pointRadius: d.data.map((_, i) => {{
return d.surgeries.some(s => DATES[i] === s.date) ? 6 : 0;
}}),
pointBackgroundColor: '#f59e0b',
pointBorderColor: '#f59e0b',
tension: 0.3,
fill: true,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{ mode: 'index', intersect: false }},
plugins: {{
legend: {{ display: false }},
tooltip: {{
...tooltip(),
callbacks: {{
afterBody(items) {{
const i = items[0].dataIndex;
const s = d.surgeries.find(x => DATES[i] === x.date);
if (!s) return [];
return [
'',
'✦ Surgery applied',
'From : ' + s.reg_from,
'To : ' + s.reg_to,
'Jaccard: ' + s.composite,
'Gain: +' + s.gain,
];
}}
}}
}},
}},
scales: {{
x: timeAxis(),
y: yAxis('Score (weight)', {{ min: 0 }}),
}},
}},
plugins: [surgeryPlugin],
}});
focusChart._surgeries = d.surgeries;
// Update meta panel
const meta = document.getElementById('trajMeta');
const surgHTML = d.surgeries.length
? d.surgeries.map(s =>
`<span class="surg-chip">✦ ${{s.date}} &nbsp;${{s.reg_from}} → ${{s.reg_to}}</span>`
).join('')
: '<span style="color:#475569;font-size:0.75rem">No surgery applied</span>';
meta.innerHTML = `
<span class="meta-id" style="border-color:${{d.borderColor}};color:${{d.borderColor}}">${{d.label}}</span>
<div class="meta-surgs">${{surgHTML}}</div>
`;
}}
function buildSparkGrid() {{
const grid = document.getElementById('sparkGrid');
grid.innerHTML = '';
TRAJ.forEach((d, idx) => {{
const wrap = document.createElement('div');
wrap.className = 'spark-cell' + (idx === activeIdx ? ' active' : '');
wrap.dataset.idx = idx;
const label = document.createElement('div');
label.className = 'spark-label';
label.innerHTML = d.label + (d.remapped
? ' <span class="spark-badge">R</span>' : '');
const canvasWrap = document.createElement('div');
canvasWrap.style.cssText = 'position:relative;height:52px';
const cv = document.createElement('canvas');
canvasWrap.appendChild(cv);
wrap.appendChild(label);
wrap.appendChild(canvasWrap);
grid.appendChild(wrap);
const sc = new Chart(cv, {{
type: 'line',
data: {{
labels: DATES,
datasets: [{{
data: d.data,
borderColor: d.borderColor,
borderWidth: 1.5,
pointRadius: 0,
tension: 0.3,
fill: false,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
animation: false,
plugins: {{ legend: {{ display: false }}, tooltip: {{ enabled: false }} }},
scales: {{
x: {{ display: false }},
y: {{ display: false, min: 0 }},
}},
}},
}});
sparkCharts.push(sc);
wrap.addEventListener('click', () => {{
document.querySelectorAll('.spark-cell').forEach(c => c.classList.remove('active'));
document.querySelectorAll('.traj-btn').forEach(b => b.classList.remove('active'));
wrap.classList.add('active');
document.querySelector(`.traj-btn[data-idx="${{idx}}"]`).classList.add('active');
activeIdx = idx;
buildFocusChart(idx);
}});
}});
}}
function buildSelector() {{
const sel = document.getElementById('trajSelector');
sel.innerHTML = '';
TRAJ.forEach((d, idx) => {{
const btn = document.createElement('button');
btn.className = 'traj-btn' + (idx === 0 ? ' active' : '');
btn.dataset.idx = idx;
btn.style.setProperty('--acc', d.borderColor);
btn.innerHTML = d.label + (d.remapped ? ' <span class="traj-btn-badge">R</span>' : '');
btn.addEventListener('click', () => {{
document.querySelectorAll('.traj-btn').forEach(b => b.classList.remove('active'));
document.querySelectorAll('.spark-cell').forEach(c => c.classList.remove('active'));
btn.classList.add('active');
const cell = document.querySelector(`.spark-cell[data-idx="${{idx}}"]`);
if (cell) cell.classList.add('active');
activeIdx = idx;
buildFocusChart(idx);
}});
sel.appendChild(btn);
}});
}}
buildSelector();
buildFocusChart(0);
buildSparkGrid();
}})();
// ── 5. N surgeries ────────────────────────────────────────────
new Chart(document.getElementById('chartNSurg'), {{
type: 'bar',
data: {{
labels: SURG_DATES,
datasets: [{{
label: 'Surgeries',
data: N_SURG,
backgroundColor: '#7c3aed99',
borderColor: '#7c3aed',
borderWidth: 1,
borderRadius: 3,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{
x: timeAxis('Month'),
y: yAxis('# operations', {{ min: 0, ticks: {{ stepSize: 1 }} }}),
}},
}},
}});
// ── 6. Avg gain ───────────────────────────────────────────────
new Chart(document.getElementById('chartGain'), {{
type: 'bar',
data: {{
labels: SURG_DATES,
datasets: [
{{
label: 'Total gain',
data: TOTAL_GAIN,
backgroundColor: '#10b98199',
borderColor: '#10b981',
borderWidth: 1,
borderRadius: 3,
}},
{{
label: 'Avg gain / surgery',
data: AVG_GAIN,
backgroundColor: '#06b6d455',
borderColor: '#06b6d4',
borderWidth: 1,
borderRadius: 3,
}},
],
}},
options: {{
responsive: true, maintainAspectRatio: false,
interaction: {{ mode: 'index', intersect: false }},
plugins: {{ legend: {{ position: 'top', labels: {{ boxWidth: 10 }} }}, tooltip: tooltip() }},
scales: {{ x: timeAxis('Month'), y: yAxis('Score gain') }},
}},
}});
// ── 7. Jaccard ────────────────────────────────────────────────
new Chart(document.getElementById('chartJaccard'), {{
type: 'bar',
data: {{
labels: SURG_DATES,
datasets: [{{
label: 'Avg Jaccard composite',
data: AVG_JACCARD,
backgroundColor: '#f59e0b88',
borderColor: '#f59e0b',
borderWidth: 1,
borderRadius: 3,
}}],
}},
options: {{
responsive: true, maintainAspectRatio: false,
plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
scales: {{
x: timeAxis('Month'),
y: yAxis('Jaccard composite', {{ min: 0, max: 1.05 }}),
}},
}},
}});
</script>
</body>
</html>"""
return html
# ─────────────────────────────────────────────────────────────
# 5. MAIN
# ─────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Carmignac pipeline results analyser")
parser.add_argument("--scores", default="repair_results/carmignac_scores.csv")
parser.add_argument("--mapping", default="repair_results/carmignac_mapping.csv")
parser.add_argument("--surgery", default="repair_results/carmignac_surgery_log.csv")
parser.add_argument("--out", default="repair_results/carmignac_report.html")
args = parser.parse_args()
# Resolve paths relative to this script's directory if files not found
base = os.path.dirname(os.path.abspath(__file__))
def resolve(p):
if os.path.exists(p):
return p
alt = os.path.join(base, p)
if os.path.exists(alt):
return alt
sys.exit(f"[ERROR] File not found: {p}")
scores_path = resolve(args.scores)
mapping_path = resolve(args.mapping)
surgery_path = resolve(args.surgery)
print(f"[Load] scores : {scores_path}")
print(f"[Load] mapping : {mapping_path}")
print(f"[Load] surgery : {surgery_path}")
scores, mapping, surgery = load_outputs(scores_path, mapping_path, surgery_path)
analytics = compute_analytics(scores, mapping, surgery)
print_summary(analytics, surgery)
html = build_html(analytics, surgery, scores, mapping)
out_path = args.out
with open(out_path, "w", encoding="utf-8") as f:
f.write(html)
print(f"\n[Report] Written to → {out_path}")
if __name__ == "__main__":
main()