Project_Carmignac/repair_challenge/carmignac repair.py

"""
Carmignac Data Challenge — Pipeline Results Analysis
=====================================================
Analyses the CSV outputs produced by carmignac_repair.py:
  - carmignac_scores.csv      (post-surgery score history)
  - carmignac_mapping.csv     (reg_id mapping history)
  - carmignac_surgery_log.csv (surgery operations)

Produces a self-contained HTML report with interactive charts.

Usage:
    python carmignac_analysis.py
    python carmignac_analysis.py --scores path/to/scores.csv \
                                  --mapping path/to/mapping.csv \
                                  --surgery path/to/surgery_log.csv \
                                  --out report.html
"""

import argparse
import json
import os
import sys
import numpy as np
import pandas as pd

# ─────────────────────────────────────────────────────────────
# 1. LOAD & VALIDATE
# ─────────────────────────────────────────────────────────────

def load_outputs(scores_path, mapping_path, surgery_path):
    scores  = pd.read_csv(scores_path,  parse_dates=["date"])
    mapping = pd.read_csv(mapping_path, parse_dates=["date"])
    surgery = pd.read_csv(surgery_path, parse_dates=["date"])

    # Normalise dtypes
    scores["reg_id"]       = scores["reg_id"].astype(str)
    mapping["reg_orig"]    = mapping["reg_orig"].astype(str)
    mapping["reg_used"]    = mapping["reg_used"].astype(str)
    mapping["changed"]     = mapping["changed"].astype(bool)
    surgery["reg_orig"]    = surgery["reg_orig"].astype(str)
    surgery["reg_from"]    = surgery["reg_from"].astype(str)
    surgery["reg_to"]      = surgery["reg_to"].astype(str)

    return scores, mapping, surgery


# ─────────────────────────────────────────────────────────────
# 2. COMPUTE ANALYTICS
# ─────────────────────────────────────────────────────────────

def compute_analytics(scores, mapping, surgery):
    dates = sorted(scores["date"].unique())

    # ── 2.1  Sum of scores per date (post-surgery) ──────────────
    sum_post = (scores.groupby("date")["score"]
                      .sum()
                      .reindex(dates)
                      .rename("sum_post"))

    # ── 2.2  Reconstruct pre-surgery (counterfactual) ───────────
    # Without surgery, every reg_id that had a hard break would score 0
    # from that date backwards.  We propagate the surgery "gain" as a
    # cumulative deficit going back in time.
    gain_by_date = surgery.groupby("date")["gain_vs_no_surgery"].sum()
    # cumulative deficit = sum of gains for all surgeries at or after date t
    cumulative_deficit = pd.Series(0.0, index=dates)
    for d in dates:
        cumulative_deficit[d] = gain_by_date[gain_by_date.index >= d].sum()
    sum_pre = (sum_post - cumulative_deficit).clip(lower=0).rename("sum_pre")

    timeline = pd.DataFrame({"sum_post": sum_post, "sum_pre": sum_pre})
    timeline.index = pd.to_datetime(timeline.index)
    timeline["recovery_pct"] = np.where(
        sum_pre < sum_post,
        (sum_post - sum_pre) / sum_post.clip(lower=1e-9) * 100,
        0.0,
    )

    # ── 2.3  Per-date surgery stats ─────────────────────────────
    surgery_stats = (
        surgery.groupby("date")
               .agg(
                   n_surgeries        = ("reg_orig",            "count"),
                   total_gain         = ("gain_vs_no_surgery",  "sum"),
                   avg_gain           = ("gain_vs_no_surgery",  "mean"),
                   avg_jaccard        = ("jaccard_composite",   "mean"),
                   avg_score_before   = ("score_before",        "mean"),
                   avg_score_after    = ("score_after",         "mean"),
               )
               .reindex(dates, fill_value=0)
    )

    # ── 2.4  Score distribution over time ───────────────────────
    # Wide format: rows=dates, cols=reg_ids
    pivot = scores.pivot_table(index="date", columns="reg_id",
                               values="score", aggfunc="last")
    pivot = pivot.reindex(dates)
    pivot.index = pd.to_datetime(pivot.index)

    # ── 2.5  Mapping churn ──────────────────────────────────────
    # For each date, how many reg_ids are remapped (not using their original code)?
    churn = (mapping.groupby("date")["changed"]
                    .sum()
                    .reindex(dates, fill_value=0)
                    .rename("n_remapped"))

    # ── 2.6  Score entropy (distribution spread) ────────────────
    def entropy(row):
        p = row.dropna()
        p = p[p > 0]
        if len(p) == 0:
            return np.nan
        p = p / p.sum()
        return -(p * np.log(p)).sum()

    timeline["entropy"] = pivot.apply(entropy, axis=1).values

    # ── 2.7  Individual score trajectories ──────────────────────
    # Identify which reg_ids were ever remapped
    ever_remapped = set(mapping.loc[mapping["changed"], "reg_orig"].unique())

    # ── 2.8  Surgery detail table ───────────────────────────────
    surgery_detail = surgery.copy()
    surgery_detail["gain_pct_of_score"] = (
        surgery_detail["gain_vs_no_surgery"]
        / surgery_detail["score_before"].clip(lower=1e-9) * 100
    ).round(2)

    return {
        "timeline":       timeline,
        "surgery_stats":  surgery_stats,
        "pivot":          pivot,
        "churn":          churn,
        "ever_remapped":  ever_remapped,
        "surgery_detail": surgery_detail,
        "dates":          [d.strftime("%Y-%m-%d") for d in dates],
    }


# ─────────────────────────────────────────────────────────────
# 3. PRINT CONSOLE SUMMARY
# ─────────────────────────────────────────────────────────────

def print_summary(analytics, surgery):
    tl = analytics["timeline"]
    ss = analytics["surgery_stats"]

    print("\n" + "=" * 65)
    print("  CARMIGNAC PIPELINE — RESULTS SUMMARY")
    print("=" * 65)

    print(f"\n  Date range   : {tl.index.min().date()} → {tl.index.max().date()}")
    print(f"  Total months : {len(tl)}")
    print(f"  Reg IDs      : {analytics['pivot'].shape[1]}")

    print(f"\n  ── Score (Σ) ──────────────────────────────────────────")
    print(f"  At t_ref (latest)  : {tl['sum_post'].iloc[-1]:.6f}")
    print(f"  At t_min (earliest): {tl['sum_post'].iloc[0]:.6f}")
    print(f"  Min (post-surgery) : {tl['sum_post'].min():.6f}  "
          f"({tl['sum_post'].idxmin().date()})")
    print(f"  Min (pre-surgery)  : {tl['sum_pre'].min():.6f}  "
          f"({tl['sum_pre'].idxmin().date()})")
    print(f"  Max recovery (pct) : {tl['recovery_pct'].max():.2f}%")

    print(f"\n  ── Surgeries ─────────────────────────────────────────")
    if len(surgery) == 0:
        print("  No surgeries performed.")
    else:
        print(f"  Total operations   : {len(surgery)}")
        print(f"  Total score gained : {surgery['gain_vs_no_surgery'].sum():.6f}")
        print(f"  Avg Jaccard        : {surgery['jaccard_composite'].mean():.4f}")
        print(f"  Avg gain / surgery : {surgery['gain_vs_no_surgery'].mean():.6f}")
        print()
        print(f"  {'Date':12s} {'Reg orig':12s} {'From':15s} {'To':15s} "
              f"{'Jaccard':>8s} {'Gain':>10s}")
        print("  " + "-" * 78)
        for _, row in surgery.sort_values("date").iterrows():
            print(f"  {str(row['date'].date()):12s} {row['reg_orig']:12s} "
                  f"{row['reg_from']:15s} {row['reg_to']:15s} "
                  f"{row['jaccard_composite']:8.4f} {row['gain_vs_no_surgery']:10.6f}")

    print(f"\n  ── Mapping churn ─────────────────────────────────────")
    ch = analytics["churn"]
    print(f"  Max remapped at one date  : {int(ch.max())}  ({ch.idxmax().date() if ch.max()>0 else 'N/A'})")
    print(f"  Reg IDs ever remapped     : {len(analytics['ever_remapped'])}")

    print(f"\n  ── Score entropy (distribution spread) ───────────────")
    ent = analytics["timeline"]["entropy"]
    print(f"  Mean entropy : {ent.mean():.4f}")
    print(f"  Std  entropy : {ent.std():.4f}")
    print()


# ─────────────────────────────────────────────────────────────
# 4. BUILD HTML REPORT
# ─────────────────────────────────────────────────────────────

def build_html(analytics, surgery, scores, mapping):
    tl  = analytics["timeline"]
    ss  = analytics["surgery_stats"]
    piv = analytics["pivot"]
    ch  = analytics["churn"]
    dates_str = analytics["dates"]

    # ── helpers to serialise for JS ─────────────────────────────
    def jf(arr, decimals=6):
        return json.dumps([round(float(v), decimals) if not np.isnan(v) else None
                           for v in arr])

    def js(arr):
        return json.dumps(list(arr))

    # ── colour palette ───────────────────────────────────────────
    REG_COLORS = [
        "#2563eb","#16a34a","#dc2626","#d97706","#7c3aed",
        "#0891b2","#db2777","#65a30d","#ea580c","#6366f1",
        "#059669","#b45309","#9333ea","#0284c7","#e11d48",
    ]

    # ── 4.1  Surgery sparkline data ──────────────────────────────
    surg_dates  = [d.strftime("%Y-%m-%d") for d in ss.index]
    n_surg      = jf(ss["n_surgeries"].values, 0)
    total_gain  = jf(ss["total_gain"].values)
    avg_gain    = jf(ss["avg_gain"].values)
    avg_jaccard = jf(ss["avg_jaccard"].values)

    # ── 4.2  Individual trajectories ────────────────────────────
    reg_ids = list(piv.columns)
    traj_datasets = []
    for idx, rid in enumerate(reg_ids):
        col   = analytics["ever_remapped"]
        dashed = rid in col
        traj_datasets.append({
            "label":       rid,
            "data":        [round(float(v), 6) if not np.isnan(v) else None
                            for v in piv[rid].values],
            "borderColor": REG_COLORS[idx % len(REG_COLORS)],
            "backgroundColor": REG_COLORS[idx % len(REG_COLORS)] + "22",
            "borderWidth":  2 if not dashed else 2,
            "borderDash":   [] if not dashed else [6, 3],
            "pointRadius":  0,
            "tension":      0.3,
            "fill":         False,
        })

    traj_json = json.dumps(traj_datasets)

    # ── 4.3  Surgery detail table rows ──────────────────────────
    sd = analytics["surgery_detail"].sort_values("date")
    surg_rows_html = ""
    if len(sd) == 0:
        surg_rows_html = "<tr><td colspan='8' style='text-align:center;color:#888'>No surgeries performed</td></tr>"
    else:
        for _, r in sd.iterrows():
            gain_class = "gain-high" if r["gain_vs_no_surgery"] > 0.05 else "gain-low"
            surg_rows_html += f"""
            <tr>
                <td>{r['date'].date()}</td>
                <td><span class="reg-badge">{r['reg_orig']}</span></td>
                <td class="code-cell">{r['reg_from']}</td>
                <td>→</td>
                <td class="code-cell">{r['reg_to']}</td>
                <td>{r['jaccard_composite']:.4f}</td>
                <td class="{gain_class}">+{r['gain_vs_no_surgery']:.6f}</td>
                <td>{r['gain_pct_of_score']:.1f}%</td>
            </tr>"""

    # ── 4.4  Top accounts table ──────────────────────────────────
    last_date = piv.index.max()
    top_accounts = piv.loc[last_date].dropna().sort_values(ascending=False)
    top_rows_html = ""
    for rank, (rid, sc) in enumerate(top_accounts.items(), 1):
        remapped = "✓" if rid in analytics["ever_remapped"] else ""
        bar_w    = int(sc / top_accounts.max() * 100)
        color    = REG_COLORS[(rank - 1) % len(REG_COLORS)]
        top_rows_html += f"""
        <tr>
            <td class="rank">#{rank}</td>
            <td><span class="reg-badge" style="background:{color}22;border-color:{color}">{rid}</span></td>
            <td class="score-val">{sc:.6f}</td>
            <td class="bar-cell">
                <div class="score-bar" style="width:{bar_w}%;background:{color}"></div>
            </td>
            <td class="center">{remapped}</td>
        </tr>"""

    # ─────────────────────────────────────────────────────────────
    # HTML TEMPLATE
    # ─────────────────────────────────────────────────────────────
    html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Carmignac Pipeline — Analysis Report</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
<style>
  @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;600;700&display=swap');

  :root {{
    --bg:       #0d0f12;
    --surface:  #151820;
    --border:   #252a35;
    --accent:   #3b82f6;
    --accent2:  #10b981;
    --warn:     #f59e0b;
    --danger:   #ef4444;
    --text:     #e2e8f0;
    --muted:    #64748b;
    --mono:     'IBM Plex Mono', monospace;
    --sans:     'IBM Plex Sans', sans-serif;
  }}

  *, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}

  body {{
    font-family: var(--sans);
    background: var(--bg);
    color: var(--text);
    min-height: 100vh;
    padding: 0 0 60px;
  }}

  /* ── Header ── */
  .header {{
    background: linear-gradient(135deg, #0d1117 0%, #111827 50%, #0d1f3c 100%);
    border-bottom: 1px solid var(--border);
    padding: 40px 48px 36px;
    position: relative;
    overflow: hidden;
  }}
  .header::before {{
    content: '';
    position: absolute;
    inset: 0;
    background: radial-gradient(ellipse 70% 80% at 80% 50%, #1e40af18, transparent);
    pointer-events: none;
  }}
  .header-eyebrow {{
    font-family: var(--mono);
    font-size: 11px;
    letter-spacing: 0.15em;
    color: var(--accent);
    text-transform: uppercase;
    margin-bottom: 10px;
  }}
  .header h1 {{
    font-size: 2rem;
    font-weight: 700;
    letter-spacing: -0.02em;
    line-height: 1.1;
    margin-bottom: 8px;
  }}
  .header-sub {{
    font-size: 0.85rem;
    color: var(--muted);
    font-family: var(--mono);
  }}

  /* ── KPI strip ── */
  .kpi-strip {{
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
    gap: 1px;
    background: var(--border);
    border-bottom: 1px solid var(--border);
  }}
  .kpi {{
    background: var(--surface);
    padding: 22px 28px;
    display: flex;
    flex-direction: column;
    gap: 4px;
  }}
  .kpi-label {{
    font-size: 0.7rem;
    letter-spacing: 0.1em;
    text-transform: uppercase;
    color: var(--muted);
    font-family: var(--mono);
  }}
  .kpi-value {{
    font-size: 1.6rem;
    font-weight: 700;
    font-family: var(--mono);
    color: var(--text);
    line-height: 1;
  }}
  .kpi-value.accent  {{ color: var(--accent); }}
  .kpi-value.success {{ color: var(--accent2); }}
  .kpi-value.warn    {{ color: var(--warn); }}
  .kpi-sub {{
    font-size: 0.7rem;
    color: var(--muted);
    font-family: var(--mono);
  }}

  /* ── Main layout ── */
  .main {{
    max-width: 1400px;
    margin: 0 auto;
    padding: 36px 48px;
    display: flex;
    flex-direction: column;
    gap: 32px;
  }}

  /* ── Cards ── */
  .card {{
    background: var(--surface);
    border: 1px solid var(--border);
    border-radius: 8px;
    overflow: hidden;
  }}
  .card-header {{
    padding: 18px 24px 14px;
    border-bottom: 1px solid var(--border);
    display: flex;
    align-items: baseline;
    gap: 12px;
  }}
  .card-title {{
    font-size: 0.8rem;
    font-weight: 600;
    letter-spacing: 0.1em;
    text-transform: uppercase;
    color: var(--muted);
    font-family: var(--mono);
  }}
  .card-desc {{
    font-size: 0.78rem;
    color: #475569;
  }}
  .card-body {{
    padding: 24px;
  }}
  .chart-wrap {{
    position: relative;
    height: 280px;
  }}
  .chart-wrap-tall {{
    position: relative;
    height: 340px;
  }}

  /* ── Two-column grid ── */
  .grid-2 {{
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 24px;
  }}
  @media (max-width: 900px) {{
    .grid-2 {{ grid-template-columns: 1fr; }}
    .main {{ padding: 24px 20px; }}
  }}

  /* ── Section label ── */
  .section-label {{
    font-family: var(--mono);
    font-size: 0.68rem;
    letter-spacing: 0.15em;
    text-transform: uppercase;
    color: var(--muted);
    padding: 0 4px;
    border-left: 3px solid var(--accent);
    padding-left: 10px;
    margin-bottom: -8px;
  }}

  /* ── Tables ── */
  table {{
    width: 100%;
    border-collapse: collapse;
    font-size: 0.82rem;
  }}
  th {{
    font-family: var(--mono);
    font-size: 0.68rem;
    letter-spacing: 0.08em;
    text-transform: uppercase;
    color: var(--muted);
    padding: 10px 14px;
    text-align: left;
    border-bottom: 1px solid var(--border);
    background: #0f1218;
  }}
  td {{
    padding: 10px 14px;
    border-bottom: 1px solid #1a1f2a;
    vertical-align: middle;
  }}
  tr:last-child td {{ border-bottom: none; }}
  tr:hover td {{ background: #181e2b; }}

  .rank {{ color: var(--muted); font-family: var(--mono); font-size: 0.75rem; }}
  .score-val {{ font-family: var(--mono); color: var(--accent2); }}
  .code-cell {{ font-family: var(--mono); font-size: 0.78rem; color: #94a3b8; }}
  .center {{ text-align: center; color: var(--accent2); }}
  .gain-high {{ font-family: var(--mono); color: var(--accent2); font-weight: 600; }}
  .gain-low  {{ font-family: var(--mono); color: var(--warn); }}

  .bar-cell {{ width: 120px; }}
  .score-bar {{
    height: 6px;
    border-radius: 3px;
    min-width: 2px;
    transition: width 0.3s;
  }}

  .reg-badge {{
    display: inline-block;
    padding: 2px 8px;
    border-radius: 4px;
    background: #1e2a3a;
    border: 1px solid #2d3f54;
    font-family: var(--mono);
    font-size: 0.75rem;
    color: var(--accent);
    white-space: nowrap;
  }}

  /* ── Legend patch ── */
  .legend-patch {{
    display: inline-block;
    width: 12px; height: 12px;
    border-radius: 2px;
    margin-right: 4px;
    vertical-align: middle;
  }}

  /* ── No-surgery notice ── */
  .no-surg {{
    padding: 32px;
    text-align: center;
    color: var(--muted);
    font-family: var(--mono);
    font-size: 0.82rem;
  }}

  /* ── Footer ── */
  .footer {{
    text-align: center;
    font-family: var(--mono);
    font-size: 0.68rem;
    color: #334155;
    margin-top: 16px;
    letter-spacing: 0.05em;
  }}
</style>
</head>
<body>

<!-- ═══════════════════════════════════════════ HEADER -->
<div class="header">
  <div class="header-eyebrow">Carmignac × ENSAE · Data Challenge 2025</div>
  <h1>Pipeline Results — Analysis Report</h1>
  <div class="header-sub">Registrar ID repair · Score propagation · Surgery audit</div>
</div>

<!-- ═══════════════════════════════════════════ KPI STRIP -->
<div class="kpi-strip">
  <div class="kpi">
    <span class="kpi-label">Σ score at t_ref</span>
    <span class="kpi-value success">{tl['sum_post'].iloc[-1]:.4f}</span>
    <span class="kpi-sub">post-surgery</span>
  </div>
  <div class="kpi">
    <span class="kpi-label">Σ score at t_min</span>
    <span class="kpi-value accent">{tl['sum_post'].iloc[0]:.4f}</span>
    <span class="kpi-sub">post-surgery</span>
  </div>
  <div class="kpi">
    <span class="kpi-label">Max recovery</span>
    <span class="kpi-value warn">{tl['recovery_pct'].max():.1f}%</span>
    <span class="kpi-sub">score rescued by surgery</span>
  </div>
  <div class="kpi">
    <span class="kpi-label">Total surgeries</span>
    <span class="kpi-value">{len(surgery)}</span>
    <span class="kpi-sub">operations performed</span>
  </div>
  <div class="kpi">
    <span class="kpi-label">Reg IDs universe</span>
    <span class="kpi-value">{piv.shape[1]}</span>
    <span class="kpi-sub">at reference date</span>
  </div>
  <div class="kpi">
    <span class="kpi-label">Ever remapped</span>
    <span class="kpi-value warn">{len(analytics['ever_remapped'])}</span>
    <span class="kpi-sub">reg IDs w/ code change</span>
  </div>
</div>

<!-- ═══════════════════════════════════════════ MAIN -->
<div class="main">

  <div class="section-label">01 · Score Integrity Over Time</div>

  <!-- Chart 1: Σ score with vs without surgery -->
  <div class="card">
    <div class="card-header">
      <span class="card-title">Sum of scores — pre vs post surgery</span>
      <span class="card-desc">
        Post-surgery (solid) shows the corrected score after code repairs.
        Pre-surgery (dashed) is the counterfactual without any remapping.
        Gap = score rescued.
      </span>
    </div>
    <div class="card-body">
      <div class="chart-wrap-tall">
        <canvas id="chartSigma"></canvas>
      </div>
    </div>
  </div>

  <!-- Chart 2: Score drop (pre) -->
  <div class="grid-2">
    <div class="card">
      <div class="card-header">
        <span class="card-title">Score recovered by surgery</span>
        <span class="card-desc">Difference post − pre at each date</span>
      </div>
      <div class="card-body">
        <div class="chart-wrap">
          <canvas id="chartRecovery"></canvas>
        </div>
      </div>
    </div>

    <div class="card">
      <div class="card-header">
        <span class="card-title">Portfolio concentration (entropy)</span>
        <span class="card-desc">Shannon entropy of score distribution — higher = more spread</span>
      </div>
      <div class="card-body">
        <div class="chart-wrap">
          <canvas id="chartEntropy"></canvas>
        </div>
      </div>
    </div>
  </div>

  <div class="section-label">02 · Individual Score Trajectories</div>

  <div class="card">
    <div class="card-header">
      <span class="card-title">Score per Registrar Account — full history</span>
      <span class="card-desc">
        Dashed lines = accounts that were remapped at some point (surgery applied).
        Solid lines = stable codes throughout.
      </span>
    </div>
    <div class="card-body">
      <div class="chart-wrap-tall" style="height:360px">
        <canvas id="chartTraj"></canvas>
      </div>
    </div>
  </div>

  <div class="section-label">03 · Surgery Operations</div>

  <div class="grid-2">
    <div class="card">
      <div class="card-header">
        <span class="card-title">Surgeries per time step</span>
        <span class="card-desc">Number of code remappings performed at each month</span>
      </div>
      <div class="card-body">
        <div class="chart-wrap">
          <canvas id="chartNSurg"></canvas>
        </div>
      </div>
    </div>

    <div class="card">
      <div class="card-header">
        <span class="card-title">Score gain per surgery</span>
        <span class="card-desc">Average gain in Σ score from surgery at each month</span>
      </div>
      <div class="card-body">
        <div class="chart-wrap">
          <canvas id="chartGain"></canvas>
        </div>
      </div>
    </div>
  </div>

  <div class="card">
    <div class="card-header">
      <span class="card-title">Jaccard similarity of surgery matches</span>
      <span class="card-desc">
        Composite Jaccard score of the matched code pair — closer to 1.0 = stronger portfolio overlap.
        Low values may indicate uncertain matches.
      </span>
    </div>
    <div class="card-body">
      <div class="chart-wrap">
        <canvas id="chartJaccard"></canvas>
      </div>
    </div>
  </div>

  <div class="section-label">04 · Surgery Detail Log</div>

  <div class="card">
    <div class="card-header">
      <span class="card-title">All surgery operations</span>
    </div>
    <div class="card-body" style="padding:0">
      {'<div class="no-surg">No surgeries were performed on this dataset.</div>' if len(surgery) == 0 else f"""
      <table>
        <thead>
          <tr>
            <th>Date</th>
            <th>Reg orig</th>
            <th>Code from</th>
            <th></th>
            <th>Code to</th>
            <th>Jaccard</th>
            <th>Score gain</th>
            <th>% of score</th>
          </tr>
        </thead>
        <tbody>{surg_rows_html}</tbody>
      </table>"""}
    </div>
  </div>

  <div class="section-label">05 · Score Ranking at t_ref</div>

  <div class="card">
    <div class="card-header">
      <span class="card-title">Accounts ranked by weight at reference date</span>
      <span class="card-desc">✓ in last column = account was remapped at some point in history</span>
    </div>
    <div class="card-body" style="padding:0">
      <table>
        <thead>
          <tr>
            <th>Rank</th>
            <th>Registrar ID</th>
            <th>Score (weight)</th>
            <th style="width:140px">Relative size</th>
            <th>Remapped</th>
          </tr>
        </thead>
        <tbody>{top_rows_html}</tbody>
      </table>
    </div>
  </div>

</div><!-- /main -->

<div class="footer">Generated by carmignac_analysis.py · Carmignac × ENSAE Data Challenge 2025</div>

<!-- ═══════════════════════════════════════════ CHARTS JS -->
<script>
Chart.defaults.color = '#64748b';
Chart.defaults.borderColor = '#1e2535';
Chart.defaults.font.family = "'IBM Plex Mono', monospace";
Chart.defaults.font.size = 11;

const DATES   = {js(dates_str)};
const SUM_POST = {jf(tl['sum_post'].values)};
const SUM_PRE  = {jf(tl['sum_pre'].values)};
const RECOVERY = {jf(tl['recovery_pct'].values, 4)};
const ENTROPY  = {jf(tl['entropy'].values, 4)};
const SURG_DATES   = {js(surg_dates)};
const N_SURG       = {n_surg};
const TOTAL_GAIN   = {total_gain};
const AVG_GAIN     = {avg_gain};
const AVG_JACCARD  = {avg_jaccard};
const TRAJ         = {traj_json};

// ── Shared options helpers ────────────────────────────────────
function timeAxis(label) {{
  return {{
    type: 'category',
    ticks: {{ maxTicksLimit: 10, maxRotation: 0 }},
    grid: {{ color: '#1a2030' }},
    title: {{ display: !!label, text: label, color: '#475569' }},
  }};
}}
function yAxis(label, opts={{}}) {{
  return {{
    grid: {{ color: '#1a2030' }},
    title: {{ display: !!label, text: label, color: '#475569' }},
    ...opts,
  }};
}}
function tooltip() {{
  return {{
    backgroundColor: '#0d1117',
    borderColor: '#252a35',
    borderWidth: 1,
    titleFont: {{ family: "'IBM Plex Mono'" }},
    bodyFont:  {{ family: "'IBM Plex Mono'" }},
    padding: 10,
  }};
}}

// ── 1. Sigma pre/post ─────────────────────────────────────────
new Chart(document.getElementById('chartSigma'), {{
  type: 'line',
  data: {{
    labels: DATES,
    datasets: [
      {{
        label: 'Σ score (post-surgery)',
        data: SUM_POST,
        borderColor: '#10b981',
        backgroundColor: '#10b98115',
        borderWidth: 2.5,
        pointRadius: 0,
        fill: false,
        tension: 0.2,
      }},
      {{
        label: 'Σ score (pre-surgery / counterfactual)',
        data: SUM_PRE,
        borderColor: '#ef4444',
        borderDash: [6, 4],
        borderWidth: 1.5,
        pointRadius: 0,
        fill: false,
        tension: 0.2,
        backgroundColor: 'transparent',
      }},
    ],
  }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    interaction: {{ mode: 'index', intersect: false }},
    plugins: {{
      legend: {{ position: 'top', labels: {{ boxWidth: 12, padding: 16 }} }},
      tooltip: tooltip(),
    }},
    scales: {{
      x: timeAxis(),
      y: yAxis('Σ scores', {{ min: 0, max: 1.05, ticks: {{ stepSize: 0.1 }} }}),
    }},
  }},
}});

// ── 2. Recovery ───────────────────────────────────────────────
new Chart(document.getElementById('chartRecovery'), {{
  type: 'bar',
  data: {{
    labels: DATES,
    datasets: [{{
      label: 'Score recovered (%)',
      data: RECOVERY,
      backgroundColor: '#3b82f6aa',
      borderColor: '#3b82f6',
      borderWidth: 1,
      borderRadius: 2,
    }}],
  }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
    scales: {{
      x: timeAxis(),
      y: yAxis('Recovery (% of Σ)', {{ min: 0 }}),
    }},
  }},
}});

// ── 3. Entropy ────────────────────────────────────────────────
new Chart(document.getElementById('chartEntropy'), {{
  type: 'line',
  data: {{
    labels: DATES,
    datasets: [{{
      label: 'Shannon entropy',
      data: ENTROPY,
      borderColor: '#d97706',
      backgroundColor: '#d9770622',
      borderWidth: 2,
      pointRadius: 0,
      fill: true,
      tension: 0.3,
    }}],
  }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
    scales: {{ x: timeAxis(), y: yAxis('Entropy (nats)') }},
  }},
}});

// ── 4. Trajectories ───────────────────────────────────────────
new Chart(document.getElementById('chartTraj'), {{
  type: 'line',
  data: {{ labels: DATES, datasets: TRAJ }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    interaction: {{ mode: 'index', intersect: false }},
    plugins: {{
      legend: {{ position: 'right', labels: {{ boxWidth: 10, padding: 10, font: {{ size: 10 }} }} }},
      tooltip: {{ ...tooltip(), itemSort: (a,b) => b.raw - a.raw }},
    }},
    scales: {{ x: timeAxis(), y: yAxis('Score (weight)') }},
  }},
}});

// ── 5. N surgeries ────────────────────────────────────────────
new Chart(document.getElementById('chartNSurg'), {{
  type: 'bar',
  data: {{
    labels: SURG_DATES,
    datasets: [{{
      label: 'Surgeries',
      data: N_SURG,
      backgroundColor: '#7c3aed99',
      borderColor: '#7c3aed',
      borderWidth: 1,
      borderRadius: 3,
    }}],
  }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
    scales: {{
      x: timeAxis('Month'),
      y: yAxis('# operations', {{ min: 0, ticks: {{ stepSize: 1 }} }}),
    }},
  }},
}});

// ── 6. Avg gain ───────────────────────────────────────────────
new Chart(document.getElementById('chartGain'), {{
  type: 'bar',
  data: {{
    labels: SURG_DATES,
    datasets: [
      {{
        label: 'Total gain',
        data: TOTAL_GAIN,
        backgroundColor: '#10b98199',
        borderColor: '#10b981',
        borderWidth: 1,
        borderRadius: 3,
      }},
      {{
        label: 'Avg gain / surgery',
        data: AVG_GAIN,
        backgroundColor: '#06b6d455',
        borderColor: '#06b6d4',
        borderWidth: 1,
        borderRadius: 3,
      }},
    ],
  }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    interaction: {{ mode: 'index', intersect: false }},
    plugins: {{ legend: {{ position: 'top', labels: {{ boxWidth: 10 }} }}, tooltip: tooltip() }},
    scales: {{ x: timeAxis('Month'), y: yAxis('Score gain') }},
  }},
}});

// ── 7. Jaccard ────────────────────────────────────────────────
new Chart(document.getElementById('chartJaccard'), {{
  type: 'bar',
  data: {{
    labels: SURG_DATES,
    datasets: [{{
      label: 'Avg Jaccard composite',
      data: AVG_JACCARD,
      backgroundColor: '#f59e0b88',
      borderColor: '#f59e0b',
      borderWidth: 1,
      borderRadius: 3,
    }}],
  }},
  options: {{
    responsive: true, maintainAspectRatio: false,
    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
    scales: {{
      x: timeAxis('Month'),
      y: yAxis('Jaccard composite', {{ min: 0, max: 1.05 }}),
    }},
  }},
}});
</script>
</body>
</html>"""

    return html


# ─────────────────────────────────────────────────────────────
# 5. MAIN
# ─────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description="Carmignac pipeline results analyser")
    parser.add_argument("--scores",  default="repair_results/carmignac_scores.csv")
    parser.add_argument("--mapping", default="repair_results/carmignac_mapping.csv")
    parser.add_argument("--surgery", default="repair_results/carmignac_surgery_log.csv")
    parser.add_argument("--out",     default="repair_results/carmignac_report.html")
    args = parser.parse_args()

    # Resolve paths relative to this script's directory if files not found
    base = os.path.dirname(os.path.abspath(__file__))
    def resolve(p):
        if os.path.exists(p):
            return p
        alt = os.path.join(base, p)
        if os.path.exists(alt):
            return alt
        sys.exit(f"[ERROR] File not found: {p}")

    scores_path  = resolve(args.scores)
    mapping_path = resolve(args.mapping)
    surgery_path = resolve(args.surgery)

    print(f"[Load] scores  : {scores_path}")
    print(f"[Load] mapping : {mapping_path}")
    print(f"[Load] surgery : {surgery_path}")

    scores, mapping, surgery = load_outputs(scores_path, mapping_path, surgery_path)
    analytics = compute_analytics(scores, mapping, surgery)

    print_summary(analytics, surgery)

    html = build_html(analytics, surgery, scores, mapping)

    out_path = args.out
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(html)
    print(f"\n[Report] Written to → {out_path}")


if __name__ == "__main__":
    main()