From bb2f24563c334d6bc248617bd588d1b73147de29 Mon Sep 17 00:00:00 2001
From: pgoze-ensae <paco.goze@ensae.fr>
Date: Fri, 20 Mar 2026 08:21:54 +0000
Subject: [PATCH] debug

---
 notebooks/aum_flows_analysis.ipynb            |   46 +-
 repair_challenge/carmignac repair.py          | 1037 -----------------
 repair_challenge/carmignac_repair.py          |  679 +++++++++++
 ..._report.html => carmignac_report_0.1.html} |    0
 4 files changed, 714 insertions(+), 1048 deletions(-)
 delete mode 100644 repair_challenge/carmignac repair.py
 create mode 100644 repair_challenge/carmignac_repair.py
 rename repair_challenge/repair_results/{carmignac_report.html => carmignac_report_0.1.html} (100%)

diff --git a/notebooks/aum_flows_analysis.ipynb b/notebooks/aum_flows_analysis.ipynb
index a7bdf72..39db03f 100644
--- a/notebooks/aum_flows_analysis.ipynb
+++ b/notebooks/aum_flows_analysis.ipynb
@@ -11,15 +11,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Matplotlib is building the font cache; this may take a moment.\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Requirement already satisfied: openpyxl in /opt/python/lib/python3.13/site-packages (3.1.5)\n",
-      "Requirement already satisfied: et-xmlfile in /opt/python/lib/python3.13/site-packages (from openpyxl) (2.0.0)\n"
+      "Collecting openpyxl\n",
+      "  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)\n",
+      "Collecting et-xmlfile (from openpyxl)\n",
+      "  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)\n",
+      "Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)\n",
+      "Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)\n",
+      "Installing collected packages: et-xmlfile, openpyxl\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [openpyxl]1/2\u001b[0m [openpyxl]\n",
+      "\u001b[1A\u001b[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5\n"
      ]
     }
    ],
@@ -37,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,21 +64,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_94182/3768862044.py:5: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  stocks = pd.read_csv(f, sep=\";\")\n"
+      "/tmp/ipykernel_3944/2679329308.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  flows = pd.read_csv(f, sep=\";\")\n"
      ]
     }
    ],
    "source": [
-    "#with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f:\n",
-    "    #flows = pd.read_csv(f, sep=\";\")\n",
+    "with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f:\n",
+    "    flows = pd.read_csv(f, sep=\";\")\n",
     "\n",
     "with fs.open('projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
     "    stocks = pd.read_csv(f, sep=\";\")\n",
@@ -466,7 +480,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -478,13 +492,23 @@
     }
    ],
    "source": [
-    "stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"], errors=\"coerce\")\n",
+    "flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"], errors=\"coerce\")\n",
     "#flows[\"Centralisation Date\"]  = pd.to_datetime(flows[\"Centralisation Date\"], errors=\"coerce\")\n",
     "#nav[\"NavDate\"] = pd.to_datetime(nav[\"NavDate\"], format=\"%d/%m/%Y\", errors=\"coerce\")\n",
     "\n",
     "print(\"Date conversion done.\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flows_head = flows.head(100)\n",
+    "flows_head.to_csv(\"flows_head.csv\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 16,
diff --git a/repair_challenge/carmignac repair.py b/repair_challenge/carmignac repair.py
deleted file mode 100644
index f8dd91c..0000000
--- a/repair_challenge/carmignac repair.py	
+++ /dev/null
@@ -1,1037 +0,0 @@
-"""
-Carmignac Data Challenge — Pipeline Results Analysis
-=====================================================
-Analyses the CSV outputs produced by carmignac_repair.py:
-  - carmignac_scores.csv      (post-surgery score history)
-  - carmignac_mapping.csv     (reg_id mapping history)
-  - carmignac_surgery_log.csv (surgery operations)
-
-Produces a self-contained HTML report with interactive charts.
-
-Usage:
-    python carmignac_analysis.py
-    python carmignac_analysis.py --scores path/to/scores.csv \
-                                  --mapping path/to/mapping.csv \
-                                  --surgery path/to/surgery_log.csv \
-                                  --out report.html
-"""
-
-import argparse
-import json
-import os
-import sys
-import numpy as np
-import pandas as pd
-
-# ─────────────────────────────────────────────────────────────
-# 1. LOAD & VALIDATE
-# ─────────────────────────────────────────────────────────────
-
-def load_outputs(scores_path, mapping_path, surgery_path):
-    scores  = pd.read_csv(scores_path,  parse_dates=["date"])
-    mapping = pd.read_csv(mapping_path, parse_dates=["date"])
-    surgery = pd.read_csv(surgery_path, parse_dates=["date"])
-
-    # Normalise dtypes
-    scores["reg_id"]       = scores["reg_id"].astype(str)
-    mapping["reg_orig"]    = mapping["reg_orig"].astype(str)
-    mapping["reg_used"]    = mapping["reg_used"].astype(str)
-    mapping["changed"]     = mapping["changed"].astype(bool)
-    surgery["reg_orig"]    = surgery["reg_orig"].astype(str)
-    surgery["reg_from"]    = surgery["reg_from"].astype(str)
-    surgery["reg_to"]      = surgery["reg_to"].astype(str)
-
-    return scores, mapping, surgery
-
-
-# ─────────────────────────────────────────────────────────────
-# 2. COMPUTE ANALYTICS
-# ─────────────────────────────────────────────────────────────
-
-def compute_analytics(scores, mapping, surgery):
-    dates = sorted(scores["date"].unique())
-
-    # ── 2.1  Sum of scores per date (post-surgery) ──────────────
-    sum_post = (scores.groupby("date")["score"]
-                      .sum()
-                      .reindex(dates)
-                      .rename("sum_post"))
-
-    # ── 2.2  Reconstruct pre-surgery (counterfactual) ───────────
-    # Without surgery, every reg_id that had a hard break would score 0
-    # from that date backwards.  We propagate the surgery "gain" as a
-    # cumulative deficit going back in time.
-    gain_by_date = surgery.groupby("date")["gain_vs_no_surgery"].sum()
-    # cumulative deficit = sum of gains for all surgeries at or after date t
-    cumulative_deficit = pd.Series(0.0, index=dates)
-    for d in dates:
-        cumulative_deficit[d] = gain_by_date[gain_by_date.index >= d].sum()
-    sum_pre = (sum_post - cumulative_deficit).clip(lower=0).rename("sum_pre")
-
-    timeline = pd.DataFrame({"sum_post": sum_post, "sum_pre": sum_pre})
-    timeline.index = pd.to_datetime(timeline.index)
-    timeline["recovery_pct"] = np.where(
-        sum_pre < sum_post,
-        (sum_post - sum_pre) / sum_post.clip(lower=1e-9) * 100,
-        0.0,
-    )
-
-    # ── 2.3  Per-date surgery stats ─────────────────────────────
-    surgery_stats = (
-        surgery.groupby("date")
-               .agg(
-                   n_surgeries        = ("reg_orig",            "count"),
-                   total_gain         = ("gain_vs_no_surgery",  "sum"),
-                   avg_gain           = ("gain_vs_no_surgery",  "mean"),
-                   avg_jaccard        = ("jaccard_composite",   "mean"),
-                   avg_score_before   = ("score_before",        "mean"),
-                   avg_score_after    = ("score_after",         "mean"),
-               )
-               .reindex(dates, fill_value=0)
-    )
-
-    # ── 2.4  Score distribution over time ───────────────────────
-    # Wide format: rows=dates, cols=reg_ids
-    pivot = scores.pivot_table(index="date", columns="reg_id",
-                               values="score", aggfunc="last")
-    pivot = pivot.reindex(dates)
-    pivot.index = pd.to_datetime(pivot.index)
-
-    # ── 2.5  Mapping churn ──────────────────────────────────────
-    # For each date, how many reg_ids are remapped (not using their original code)?
-    churn = (mapping.groupby("date")["changed"]
-                    .sum()
-                    .reindex(dates, fill_value=0)
-                    .rename("n_remapped"))
-
-    # ── 2.6  Score entropy (distribution spread) ────────────────
-    def entropy(row):
-        p = row.dropna()
-        p = p[p > 0]
-        if len(p) == 0:
-            return np.nan
-        p = p / p.sum()
-        return -(p * np.log(p)).sum()
-
-    timeline["entropy"] = pivot.apply(entropy, axis=1).values
-
-    # ── 2.7  Individual score trajectories ──────────────────────
-    # Identify which reg_ids were ever remapped
-    ever_remapped = set(mapping.loc[mapping["changed"], "reg_orig"].unique())
-
-    # ── 2.8  Surgery detail table ───────────────────────────────
-    surgery_detail = surgery.copy()
-    surgery_detail["gain_pct_of_score"] = (
-        surgery_detail["gain_vs_no_surgery"]
-        / surgery_detail["score_before"].clip(lower=1e-9) * 100
-    ).round(2)
-
-    return {
-        "timeline":       timeline,
-        "surgery_stats":  surgery_stats,
-        "pivot":          pivot,
-        "churn":          churn,
-        "ever_remapped":  ever_remapped,
-        "surgery_detail": surgery_detail,
-        "dates":          [d.strftime("%Y-%m-%d") for d in dates],
-    }
-
-
-# ─────────────────────────────────────────────────────────────
-# 3. PRINT CONSOLE SUMMARY
-# ─────────────────────────────────────────────────────────────
-
-def print_summary(analytics, surgery):
-    tl = analytics["timeline"]
-    ss = analytics["surgery_stats"]
-
-    print("\n" + "=" * 65)
-    print("  CARMIGNAC PIPELINE — RESULTS SUMMARY")
-    print("=" * 65)
-
-    print(f"\n  Date range   : {tl.index.min().date()} → {tl.index.max().date()}")
-    print(f"  Total months : {len(tl)}")
-    print(f"  Reg IDs      : {analytics['pivot'].shape[1]}")
-
-    print(f"\n  ── Score (Σ) ──────────────────────────────────────────")
-    print(f"  At t_ref (latest)  : {tl['sum_post'].iloc[-1]:.6f}")
-    print(f"  At t_min (earliest): {tl['sum_post'].iloc[0]:.6f}")
-    print(f"  Min (post-surgery) : {tl['sum_post'].min():.6f}  "
-          f"({tl['sum_post'].idxmin().date()})")
-    print(f"  Min (pre-surgery)  : {tl['sum_pre'].min():.6f}  "
-          f"({tl['sum_pre'].idxmin().date()})")
-    print(f"  Max recovery (pct) : {tl['recovery_pct'].max():.2f}%")
-
-    print(f"\n  ── Surgeries ─────────────────────────────────────────")
-    if len(surgery) == 0:
-        print("  No surgeries performed.")
-    else:
-        print(f"  Total operations   : {len(surgery)}")
-        print(f"  Total score gained : {surgery['gain_vs_no_surgery'].sum():.6f}")
-        print(f"  Avg Jaccard        : {surgery['jaccard_composite'].mean():.4f}")
-        print(f"  Avg gain / surgery : {surgery['gain_vs_no_surgery'].mean():.6f}")
-        print()
-        print(f"  {'Date':12s} {'Reg orig':12s} {'From':15s} {'To':15s} "
-              f"{'Jaccard':>8s} {'Gain':>10s}")
-        print("  " + "-" * 78)
-        for _, row in surgery.sort_values("date").iterrows():
-            print(f"  {str(row['date'].date()):12s} {row['reg_orig']:12s} "
-                  f"{row['reg_from']:15s} {row['reg_to']:15s} "
-                  f"{row['jaccard_composite']:8.4f} {row['gain_vs_no_surgery']:10.6f}")
-
-    print(f"\n  ── Mapping churn ─────────────────────────────────────")
-    ch = analytics["churn"]
-    print(f"  Max remapped at one date  : {int(ch.max())}  ({ch.idxmax().date() if ch.max()>0 else 'N/A'})")
-    print(f"  Reg IDs ever remapped     : {len(analytics['ever_remapped'])}")
-
-    print(f"\n  ── Score entropy (distribution spread) ───────────────")
-    ent = analytics["timeline"]["entropy"]
-    print(f"  Mean entropy : {ent.mean():.4f}")
-    print(f"  Std  entropy : {ent.std():.4f}")
-    print()
-
-
-# ─────────────────────────────────────────────────────────────
-# 4. BUILD HTML REPORT
-# ─────────────────────────────────────────────────────────────
-
-def build_html(analytics, surgery, scores, mapping):
-    tl  = analytics["timeline"]
-    ss  = analytics["surgery_stats"]
-    piv = analytics["pivot"]
-    ch  = analytics["churn"]
-    dates_str = analytics["dates"]
-
-    # ── helpers to serialise for JS ─────────────────────────────
-    def jf(arr, decimals=6):
-        return json.dumps([round(float(v), decimals) if not np.isnan(v) else None
-                           for v in arr])
-
-    def js(arr):
-        return json.dumps(list(arr))
-
-    # ── colour palette ───────────────────────────────────────────
-    REG_COLORS = [
-        "#2563eb","#16a34a","#dc2626","#d97706","#7c3aed",
-        "#0891b2","#db2777","#65a30d","#ea580c","#6366f1",
-        "#059669","#b45309","#9333ea","#0284c7","#e11d48",
-    ]
-
-    # ── 4.1  Surgery sparkline data ──────────────────────────────
-    surg_dates  = [d.strftime("%Y-%m-%d") for d in ss.index]
-    n_surg      = jf(ss["n_surgeries"].values, 0)
-    total_gain  = jf(ss["total_gain"].values)
-    avg_gain    = jf(ss["avg_gain"].values)
-    avg_jaccard = jf(ss["avg_jaccard"].values)
-
-    # ── 4.2  Individual trajectories ────────────────────────────
-    reg_ids = list(piv.columns)
-    traj_datasets = []
-    for idx, rid in enumerate(reg_ids):
-        col   = analytics["ever_remapped"]
-        dashed = rid in col
-        traj_datasets.append({
-            "label":       rid,
-            "data":        [round(float(v), 6) if not np.isnan(v) else None
-                            for v in piv[rid].values],
-            "borderColor": REG_COLORS[idx % len(REG_COLORS)],
-            "backgroundColor": REG_COLORS[idx % len(REG_COLORS)] + "22",
-            "borderWidth":  2 if not dashed else 2,
-            "borderDash":   [] if not dashed else [6, 3],
-            "pointRadius":  0,
-            "tension":      0.3,
-            "fill":         False,
-        })
-
-    traj_json = json.dumps(traj_datasets)
-
-    # ── 4.3  Surgery detail table rows ──────────────────────────
-    sd = analytics["surgery_detail"].sort_values("date")
-    surg_rows_html = ""
-    if len(sd) == 0:
-        surg_rows_html = "<tr><td colspan='8' style='text-align:center;color:#888'>No surgeries performed</td></tr>"
-    else:
-        for _, r in sd.iterrows():
-            gain_class = "gain-high" if r["gain_vs_no_surgery"] > 0.05 else "gain-low"
-            surg_rows_html += f"""
-            <tr>
-                <td>{r['date'].date()}</td>
-                <td><span class="reg-badge">{r['reg_orig']}</span></td>
-                <td class="code-cell">{r['reg_from']}</td>
-                <td>→</td>
-                <td class="code-cell">{r['reg_to']}</td>
-                <td>{r['jaccard_composite']:.4f}</td>
-                <td class="{gain_class}">+{r['gain_vs_no_surgery']:.6f}</td>
-                <td>{r['gain_pct_of_score']:.1f}%</td>
-            </tr>"""
-
-    # ── 4.4  Top accounts table ──────────────────────────────────
-    last_date = piv.index.max()
-    top_accounts = piv.loc[last_date].dropna().sort_values(ascending=False)
-    top_rows_html = ""
-    for rank, (rid, sc) in enumerate(top_accounts.items(), 1):
-        remapped = "✓" if rid in analytics["ever_remapped"] else ""
-        bar_w    = int(sc / top_accounts.max() * 100)
-        color    = REG_COLORS[(rank - 1) % len(REG_COLORS)]
-        top_rows_html += f"""
-        <tr>
-            <td class="rank">#{rank}</td>
-            <td><span class="reg-badge" style="background:{color}22;border-color:{color}">{rid}</span></td>
-            <td class="score-val">{sc:.6f}</td>
-            <td class="bar-cell">
-                <div class="score-bar" style="width:{bar_w}%;background:{color}"></div>
-            </td>
-            <td class="center">{remapped}</td>
-        </tr>"""
-
-    # ─────────────────────────────────────────────────────────────
-    # HTML TEMPLATE
-    # ─────────────────────────────────────────────────────────────
-    html = f"""<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width,initial-scale=1">
-<title>Carmignac Pipeline — Analysis Report</title>
-<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
-<style>
-  @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;600;700&display=swap');
-
-  :root {{
-    --bg:       #0d0f12;
-    --surface:  #151820;
-    --border:   #252a35;
-    --accent:   #3b82f6;
-    --accent2:  #10b981;
-    --warn:     #f59e0b;
-    --danger:   #ef4444;
-    --text:     #e2e8f0;
-    --muted:    #64748b;
-    --mono:     'IBM Plex Mono', monospace;
-    --sans:     'IBM Plex Sans', sans-serif;
-  }}
-
-  *, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
-
-  body {{
-    font-family: var(--sans);
-    background: var(--bg);
-    color: var(--text);
-    min-height: 100vh;
-    padding: 0 0 60px;
-  }}
-
-  /* ── Header ── */
-  .header {{
-    background: linear-gradient(135deg, #0d1117 0%, #111827 50%, #0d1f3c 100%);
-    border-bottom: 1px solid var(--border);
-    padding: 40px 48px 36px;
-    position: relative;
-    overflow: hidden;
-  }}
-  .header::before {{
-    content: '';
-    position: absolute;
-    inset: 0;
-    background: radial-gradient(ellipse 70% 80% at 80% 50%, #1e40af18, transparent);
-    pointer-events: none;
-  }}
-  .header-eyebrow {{
-    font-family: var(--mono);
-    font-size: 11px;
-    letter-spacing: 0.15em;
-    color: var(--accent);
-    text-transform: uppercase;
-    margin-bottom: 10px;
-  }}
-  .header h1 {{
-    font-size: 2rem;
-    font-weight: 700;
-    letter-spacing: -0.02em;
-    line-height: 1.1;
-    margin-bottom: 8px;
-  }}
-  .header-sub {{
-    font-size: 0.85rem;
-    color: var(--muted);
-    font-family: var(--mono);
-  }}
-
-  /* ── KPI strip ── */
-  .kpi-strip {{
-    display: grid;
-    grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
-    gap: 1px;
-    background: var(--border);
-    border-bottom: 1px solid var(--border);
-  }}
-  .kpi {{
-    background: var(--surface);
-    padding: 22px 28px;
-    display: flex;
-    flex-direction: column;
-    gap: 4px;
-  }}
-  .kpi-label {{
-    font-size: 0.7rem;
-    letter-spacing: 0.1em;
-    text-transform: uppercase;
-    color: var(--muted);
-    font-family: var(--mono);
-  }}
-  .kpi-value {{
-    font-size: 1.6rem;
-    font-weight: 700;
-    font-family: var(--mono);
-    color: var(--text);
-    line-height: 1;
-  }}
-  .kpi-value.accent  {{ color: var(--accent); }}
-  .kpi-value.success {{ color: var(--accent2); }}
-  .kpi-value.warn    {{ color: var(--warn); }}
-  .kpi-sub {{
-    font-size: 0.7rem;
-    color: var(--muted);
-    font-family: var(--mono);
-  }}
-
-  /* ── Main layout ── */
-  .main {{
-    max-width: 1400px;
-    margin: 0 auto;
-    padding: 36px 48px;
-    display: flex;
-    flex-direction: column;
-    gap: 32px;
-  }}
-
-  /* ── Cards ── */
-  .card {{
-    background: var(--surface);
-    border: 1px solid var(--border);
-    border-radius: 8px;
-    overflow: hidden;
-  }}
-  .card-header {{
-    padding: 18px 24px 14px;
-    border-bottom: 1px solid var(--border);
-    display: flex;
-    align-items: baseline;
-    gap: 12px;
-  }}
-  .card-title {{
-    font-size: 0.8rem;
-    font-weight: 600;
-    letter-spacing: 0.1em;
-    text-transform: uppercase;
-    color: var(--muted);
-    font-family: var(--mono);
-  }}
-  .card-desc {{
-    font-size: 0.78rem;
-    color: #475569;
-  }}
-  .card-body {{
-    padding: 24px;
-  }}
-  .chart-wrap {{
-    position: relative;
-    height: 280px;
-  }}
-  .chart-wrap-tall {{
-    position: relative;
-    height: 340px;
-  }}
-
-  /* ── Two-column grid ── */
-  .grid-2 {{
-    display: grid;
-    grid-template-columns: 1fr 1fr;
-    gap: 24px;
-  }}
-  @media (max-width: 900px) {{
-    .grid-2 {{ grid-template-columns: 1fr; }}
-    .main {{ padding: 24px 20px; }}
-  }}
-
-  /* ── Section label ── */
-  .section-label {{
-    font-family: var(--mono);
-    font-size: 0.68rem;
-    letter-spacing: 0.15em;
-    text-transform: uppercase;
-    color: var(--muted);
-    padding: 0 4px;
-    border-left: 3px solid var(--accent);
-    padding-left: 10px;
-    margin-bottom: -8px;
-  }}
-
-  /* ── Tables ── */
-  table {{
-    width: 100%;
-    border-collapse: collapse;
-    font-size: 0.82rem;
-  }}
-  th {{
-    font-family: var(--mono);
-    font-size: 0.68rem;
-    letter-spacing: 0.08em;
-    text-transform: uppercase;
-    color: var(--muted);
-    padding: 10px 14px;
-    text-align: left;
-    border-bottom: 1px solid var(--border);
-    background: #0f1218;
-  }}
-  td {{
-    padding: 10px 14px;
-    border-bottom: 1px solid #1a1f2a;
-    vertical-align: middle;
-  }}
-  tr:last-child td {{ border-bottom: none; }}
-  tr:hover td {{ background: #181e2b; }}
-
-  .rank {{ color: var(--muted); font-family: var(--mono); font-size: 0.75rem; }}
-  .score-val {{ font-family: var(--mono); color: var(--accent2); }}
-  .code-cell {{ font-family: var(--mono); font-size: 0.78rem; color: #94a3b8; }}
-  .center {{ text-align: center; color: var(--accent2); }}
-  .gain-high {{ font-family: var(--mono); color: var(--accent2); font-weight: 600; }}
-  .gain-low  {{ font-family: var(--mono); color: var(--warn); }}
-
-  .bar-cell {{ width: 120px; }}
-  .score-bar {{
-    height: 6px;
-    border-radius: 3px;
-    min-width: 2px;
-    transition: width 0.3s;
-  }}
-
-  .reg-badge {{
-    display: inline-block;
-    padding: 2px 8px;
-    border-radius: 4px;
-    background: #1e2a3a;
-    border: 1px solid #2d3f54;
-    font-family: var(--mono);
-    font-size: 0.75rem;
-    color: var(--accent);
-    white-space: nowrap;
-  }}
-
-  /* ── Legend patch ── */
-  .legend-patch {{
-    display: inline-block;
-    width: 12px; height: 12px;
-    border-radius: 2px;
-    margin-right: 4px;
-    vertical-align: middle;
-  }}
-
-  /* ── No-surgery notice ── */
-  .no-surg {{
-    padding: 32px;
-    text-align: center;
-    color: var(--muted);
-    font-family: var(--mono);
-    font-size: 0.82rem;
-  }}
-
-  /* ── Footer ── */
-  .footer {{
-    text-align: center;
-    font-family: var(--mono);
-    font-size: 0.68rem;
-    color: #334155;
-    margin-top: 16px;
-    letter-spacing: 0.05em;
-  }}
-</style>
-</head>
-<body>
-
-<!-- ═══════════════════════════════════════════ HEADER -->
-<div class="header">
-  <div class="header-eyebrow">Carmignac × ENSAE · Data Challenge 2025</div>
-  <h1>Pipeline Results — Analysis Report</h1>
-  <div class="header-sub">Registrar ID repair · Score propagation · Surgery audit</div>
-</div>
-
-<!-- ═══════════════════════════════════════════ KPI STRIP -->
-<div class="kpi-strip">
-  <div class="kpi">
-    <span class="kpi-label">Σ score at t_ref</span>
-    <span class="kpi-value success">{tl['sum_post'].iloc[-1]:.4f}</span>
-    <span class="kpi-sub">post-surgery</span>
-  </div>
-  <div class="kpi">
-    <span class="kpi-label">Σ score at t_min</span>
-    <span class="kpi-value accent">{tl['sum_post'].iloc[0]:.4f}</span>
-    <span class="kpi-sub">post-surgery</span>
-  </div>
-  <div class="kpi">
-    <span class="kpi-label">Max recovery</span>
-    <span class="kpi-value warn">{tl['recovery_pct'].max():.1f}%</span>
-    <span class="kpi-sub">score rescued by surgery</span>
-  </div>
-  <div class="kpi">
-    <span class="kpi-label">Total surgeries</span>
-    <span class="kpi-value">{len(surgery)}</span>
-    <span class="kpi-sub">operations performed</span>
-  </div>
-  <div class="kpi">
-    <span class="kpi-label">Reg IDs universe</span>
-    <span class="kpi-value">{piv.shape[1]}</span>
-    <span class="kpi-sub">at reference date</span>
-  </div>
-  <div class="kpi">
-    <span class="kpi-label">Ever remapped</span>
-    <span class="kpi-value warn">{len(analytics['ever_remapped'])}</span>
-    <span class="kpi-sub">reg IDs w/ code change</span>
-  </div>
-</div>
-
-<!-- ═══════════════════════════════════════════ MAIN -->
-<div class="main">
-
-  <div class="section-label">01 · Score Integrity Over Time</div>
-
-  <!-- Chart 1: Σ score with vs without surgery -->
-  <div class="card">
-    <div class="card-header">
-      <span class="card-title">Sum of scores — pre vs post surgery</span>
-      <span class="card-desc">
-        Post-surgery (solid) shows the corrected score after code repairs.
-        Pre-surgery (dashed) is the counterfactual without any remapping.
-        Gap = score rescued.
-      </span>
-    </div>
-    <div class="card-body">
-      <div class="chart-wrap-tall">
-        <canvas id="chartSigma"></canvas>
-      </div>
-    </div>
-  </div>
-
-  <!-- Chart 2: Score drop (pre) -->
-  <div class="grid-2">
-    <div class="card">
-      <div class="card-header">
-        <span class="card-title">Score recovered by surgery</span>
-        <span class="card-desc">Difference post − pre at each date</span>
-      </div>
-      <div class="card-body">
-        <div class="chart-wrap">
-          <canvas id="chartRecovery"></canvas>
-        </div>
-      </div>
-    </div>
-
-    <div class="card">
-      <div class="card-header">
-        <span class="card-title">Portfolio concentration (entropy)</span>
-        <span class="card-desc">Shannon entropy of score distribution — higher = more spread</span>
-      </div>
-      <div class="card-body">
-        <div class="chart-wrap">
-          <canvas id="chartEntropy"></canvas>
-        </div>
-      </div>
-    </div>
-  </div>
-
-  <div class="section-label">02 · Individual Score Trajectories</div>
-
-  <div class="card">
-    <div class="card-header">
-      <span class="card-title">Score per Registrar Account — full history</span>
-      <span class="card-desc">
-        Dashed lines = accounts that were remapped at some point (surgery applied).
-        Solid lines = stable codes throughout.
-      </span>
-    </div>
-    <div class="card-body">
-      <div class="chart-wrap-tall" style="height:360px">
-        <canvas id="chartTraj"></canvas>
-      </div>
-    </div>
-  </div>
-
-  <div class="section-label">03 · Surgery Operations</div>
-
-  <div class="grid-2">
-    <div class="card">
-      <div class="card-header">
-        <span class="card-title">Surgeries per time step</span>
-        <span class="card-desc">Number of code remappings performed at each month</span>
-      </div>
-      <div class="card-body">
-        <div class="chart-wrap">
-          <canvas id="chartNSurg"></canvas>
-        </div>
-      </div>
-    </div>
-
-    <div class="card">
-      <div class="card-header">
-        <span class="card-title">Score gain per surgery</span>
-        <span class="card-desc">Average gain in Σ score from surgery at each month</span>
-      </div>
-      <div class="card-body">
-        <div class="chart-wrap">
-          <canvas id="chartGain"></canvas>
-        </div>
-      </div>
-    </div>
-  </div>
-
-  <div class="card">
-    <div class="card-header">
-      <span class="card-title">Jaccard similarity of surgery matches</span>
-      <span class="card-desc">
-        Composite Jaccard score of the matched code pair — closer to 1.0 = stronger portfolio overlap.
-        Low values may indicate uncertain matches.
-      </span>
-    </div>
-    <div class="card-body">
-      <div class="chart-wrap">
-        <canvas id="chartJaccard"></canvas>
-      </div>
-    </div>
-  </div>
-
-  <div class="section-label">04 · Surgery Detail Log</div>
-
-  <div class="card">
-    <div class="card-header">
-      <span class="card-title">All surgery operations</span>
-    </div>
-    <div class="card-body" style="padding:0">
-      {'<div class="no-surg">No surgeries were performed on this dataset.</div>' if len(surgery) == 0 else f"""
-      <table>
-        <thead>
-          <tr>
-            <th>Date</th>
-            <th>Reg orig</th>
-            <th>Code from</th>
-            <th></th>
-            <th>Code to</th>
-            <th>Jaccard</th>
-            <th>Score gain</th>
-            <th>% of score</th>
-          </tr>
-        </thead>
-        <tbody>{surg_rows_html}</tbody>
-      </table>"""}
-    </div>
-  </div>
-
-  <div class="section-label">05 · Score Ranking at t_ref</div>
-
-  <div class="card">
-    <div class="card-header">
-      <span class="card-title">Accounts ranked by weight at reference date</span>
-      <span class="card-desc">✓ in last column = account was remapped at some point in history</span>
-    </div>
-    <div class="card-body" style="padding:0">
-      <table>
-        <thead>
-          <tr>
-            <th>Rank</th>
-            <th>Registrar ID</th>
-            <th>Score (weight)</th>
-            <th style="width:140px">Relative size</th>
-            <th>Remapped</th>
-          </tr>
-        </thead>
-        <tbody>{top_rows_html}</tbody>
-      </table>
-    </div>
-  </div>
-
-</div><!-- /main -->
-
-<div class="footer">Generated by carmignac_analysis.py · Carmignac × ENSAE Data Challenge 2025</div>
-
-<!-- ═══════════════════════════════════════════ CHARTS JS -->
-<script>
-Chart.defaults.color = '#64748b';
-Chart.defaults.borderColor = '#1e2535';
-Chart.defaults.font.family = "'IBM Plex Mono', monospace";
-Chart.defaults.font.size = 11;
-
-const DATES   = {js(dates_str)};
-const SUM_POST = {jf(tl['sum_post'].values)};
-const SUM_PRE  = {jf(tl['sum_pre'].values)};
-const RECOVERY = {jf(tl['recovery_pct'].values, 4)};
-const ENTROPY  = {jf(tl['entropy'].values, 4)};
-const SURG_DATES   = {js(surg_dates)};
-const N_SURG       = {n_surg};
-const TOTAL_GAIN   = {total_gain};
-const AVG_GAIN     = {avg_gain};
-const AVG_JACCARD  = {avg_jaccard};
-const TRAJ         = {traj_json};
-
-// ── Shared options helpers ────────────────────────────────────
-function timeAxis(label) {{
-  return {{
-    type: 'category',
-    ticks: {{ maxTicksLimit: 10, maxRotation: 0 }},
-    grid: {{ color: '#1a2030' }},
-    title: {{ display: !!label, text: label, color: '#475569' }},
-  }};
-}}
-function yAxis(label, opts={{}}) {{
-  return {{
-    grid: {{ color: '#1a2030' }},
-    title: {{ display: !!label, text: label, color: '#475569' }},
-    ...opts,
-  }};
-}}
-function tooltip() {{
-  return {{
-    backgroundColor: '#0d1117',
-    borderColor: '#252a35',
-    borderWidth: 1,
-    titleFont: {{ family: "'IBM Plex Mono'" }},
-    bodyFont:  {{ family: "'IBM Plex Mono'" }},
-    padding: 10,
-  }};
-}}
-
-// ── 1. Sigma pre/post ─────────────────────────────────────────
-new Chart(document.getElementById('chartSigma'), {{
-  type: 'line',
-  data: {{
-    labels: DATES,
-    datasets: [
-      {{
-        label: 'Σ score (post-surgery)',
-        data: SUM_POST,
-        borderColor: '#10b981',
-        backgroundColor: '#10b98115',
-        borderWidth: 2.5,
-        pointRadius: 0,
-        fill: false,
-        tension: 0.2,
-      }},
-      {{
-        label: 'Σ score (pre-surgery / counterfactual)',
-        data: SUM_PRE,
-        borderColor: '#ef4444',
-        borderDash: [6, 4],
-        borderWidth: 1.5,
-        pointRadius: 0,
-        fill: false,
-        tension: 0.2,
-        backgroundColor: 'transparent',
-      }},
-    ],
-  }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    interaction: {{ mode: 'index', intersect: false }},
-    plugins: {{
-      legend: {{ position: 'top', labels: {{ boxWidth: 12, padding: 16 }} }},
-      tooltip: tooltip(),
-    }},
-    scales: {{
-      x: timeAxis(),
-      y: yAxis('Σ scores', {{ min: 0, max: 1.05, ticks: {{ stepSize: 0.1 }} }}),
-    }},
-  }},
-}});
-
-// ── 2. Recovery ───────────────────────────────────────────────
-new Chart(document.getElementById('chartRecovery'), {{
-  type: 'bar',
-  data: {{
-    labels: DATES,
-    datasets: [{{
-      label: 'Score recovered (%)',
-      data: RECOVERY,
-      backgroundColor: '#3b82f6aa',
-      borderColor: '#3b82f6',
-      borderWidth: 1,
-      borderRadius: 2,
-    }}],
-  }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
-    scales: {{
-      x: timeAxis(),
-      y: yAxis('Recovery (% of Σ)', {{ min: 0 }}),
-    }},
-  }},
-}});
-
-// ── 3. Entropy ────────────────────────────────────────────────
-new Chart(document.getElementById('chartEntropy'), {{
-  type: 'line',
-  data: {{
-    labels: DATES,
-    datasets: [{{
-      label: 'Shannon entropy',
-      data: ENTROPY,
-      borderColor: '#d97706',
-      backgroundColor: '#d9770622',
-      borderWidth: 2,
-      pointRadius: 0,
-      fill: true,
-      tension: 0.3,
-    }}],
-  }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
-    scales: {{ x: timeAxis(), y: yAxis('Entropy (nats)') }},
-  }},
-}});
-
-// ── 4. Trajectories ───────────────────────────────────────────
-new Chart(document.getElementById('chartTraj'), {{
-  type: 'line',
-  data: {{ labels: DATES, datasets: TRAJ }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    interaction: {{ mode: 'index', intersect: false }},
-    plugins: {{
-      legend: {{ position: 'right', labels: {{ boxWidth: 10, padding: 10, font: {{ size: 10 }} }} }},
-      tooltip: {{ ...tooltip(), itemSort: (a,b) => b.raw - a.raw }},
-    }},
-    scales: {{ x: timeAxis(), y: yAxis('Score (weight)') }},
-  }},
-}});
-
-// ── 5. N surgeries ────────────────────────────────────────────
-new Chart(document.getElementById('chartNSurg'), {{
-  type: 'bar',
-  data: {{
-    labels: SURG_DATES,
-    datasets: [{{
-      label: 'Surgeries',
-      data: N_SURG,
-      backgroundColor: '#7c3aed99',
-      borderColor: '#7c3aed',
-      borderWidth: 1,
-      borderRadius: 3,
-    }}],
-  }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
-    scales: {{
-      x: timeAxis('Month'),
-      y: yAxis('# operations', {{ min: 0, ticks: {{ stepSize: 1 }} }}),
-    }},
-  }},
-}});
-
-// ── 6. Avg gain ───────────────────────────────────────────────
-new Chart(document.getElementById('chartGain'), {{
-  type: 'bar',
-  data: {{
-    labels: SURG_DATES,
-    datasets: [
-      {{
-        label: 'Total gain',
-        data: TOTAL_GAIN,
-        backgroundColor: '#10b98199',
-        borderColor: '#10b981',
-        borderWidth: 1,
-        borderRadius: 3,
-      }},
-      {{
-        label: 'Avg gain / surgery',
-        data: AVG_GAIN,
-        backgroundColor: '#06b6d455',
-        borderColor: '#06b6d4',
-        borderWidth: 1,
-        borderRadius: 3,
-      }},
-    ],
-  }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    interaction: {{ mode: 'index', intersect: false }},
-    plugins: {{ legend: {{ position: 'top', labels: {{ boxWidth: 10 }} }}, tooltip: tooltip() }},
-    scales: {{ x: timeAxis('Month'), y: yAxis('Score gain') }},
-  }},
-}});
-
-// ── 7. Jaccard ────────────────────────────────────────────────
-new Chart(document.getElementById('chartJaccard'), {{
-  type: 'bar',
-  data: {{
-    labels: SURG_DATES,
-    datasets: [{{
-      label: 'Avg Jaccard composite',
-      data: AVG_JACCARD,
-      backgroundColor: '#f59e0b88',
-      borderColor: '#f59e0b',
-      borderWidth: 1,
-      borderRadius: 3,
-    }}],
-  }},
-  options: {{
-    responsive: true, maintainAspectRatio: false,
-    plugins: {{ legend: {{ display: false }}, tooltip: tooltip() }},
-    scales: {{
-      x: timeAxis('Month'),
-      y: yAxis('Jaccard composite', {{ min: 0, max: 1.05 }}),
-    }},
-  }},
-}});
-</script>
-</body>
-</html>"""
-
-    return html
-
-
-# ─────────────────────────────────────────────────────────────
-# 5. MAIN
-# ─────────────────────────────────────────────────────────────
-
-def main():
-    parser = argparse.ArgumentParser(description="Carmignac pipeline results analyser")
-    parser.add_argument("--scores",  default="repair_results/carmignac_scores.csv")
-    parser.add_argument("--mapping", default="repair_results/carmignac_mapping.csv")
-    parser.add_argument("--surgery", default="repair_results/carmignac_surgery_log.csv")
-    parser.add_argument("--out",     default="repair_results/carmignac_report.html")
-    args = parser.parse_args()
-
-    # Resolve paths relative to this script's directory if files not found
-    base = os.path.dirname(os.path.abspath(__file__))
-    def resolve(p):
-        if os.path.exists(p):
-            return p
-        alt = os.path.join(base, p)
-        if os.path.exists(alt):
-            return alt
-        sys.exit(f"[ERROR] File not found: {p}")
-
-    scores_path  = resolve(args.scores)
-    mapping_path = resolve(args.mapping)
-    surgery_path = resolve(args.surgery)
-
-    print(f"[Load] scores  : {scores_path}")
-    print(f"[Load] mapping : {mapping_path}")
-    print(f"[Load] surgery : {surgery_path}")
-
-    scores, mapping, surgery = load_outputs(scores_path, mapping_path, surgery_path)
-    analytics = compute_analytics(scores, mapping, surgery)
-
-    print_summary(analytics, surgery)
-
-    html = build_html(analytics, surgery, scores, mapping)
-
-    out_path = args.out
-    with open(out_path, "w", encoding="utf-8") as f:
-        f.write(html)
-    print(f"\n[Report] Written to → {out_path}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/repair_challenge/carmignac_repair.py b/repair_challenge/carmignac_repair.py
new file mode 100644
index 0000000..6fab02f
--- /dev/null
+++ b/repair_challenge/carmignac_repair.py
@@ -0,0 +1,679 @@
+"""
+Carmignac Data Challenge — Registrar ID Repair Pipeline
+=========================================================
+Étape 1 : Filtrage & univers de référence à t=31/10/2025
+Étape 2 : Score de cohérence temporelle (propagation vers le passé)
+Étape 3 : Chirurgie de code (matching 1-to-1)
+"""
+
+import pandas as pd
+import numpy as np
+from collections import defaultdict
+import os 
+import s3fs
+
+# ─────────────────────────────────────────────
+# PARAMÈTRES
+# ─────────────────────────────────────────────
+ALPHA = 0.03           # tolérance réconciliation : 3% du stock à t
+MIN_AUM_EUR = 5e6        # seuil filtrage étape 1 — 0 pour les heads de test
+MIN_JACCARD = 0.3      # seuil minimal similarité portefeuille pour chirurgie
+SCORE_DROP_THRESHOLD = 0.1  # si score chute de >10% → candidat chirurgie
+
+EXCLUDE_REGISTRAR = ["Off Distribution", "Private Clients"]
+
+# ─────────────────────────────────────────────
+# 1. CHARGEMENT
+# ─────────────────────────────────────────────
+def load_data(aum_path, flows_path):
+    fs = s3fs.S3FileSystem(
+    client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},
+    key = os.environ["AWS_ACCESS_KEY_ID"], 
+    secret = os.environ["AWS_SECRET_ACCESS_KEY"], 
+    token = os.environ["AWS_SESSION_TOKEN"])
+
+    with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f:
+        flows = pd.read_csv(f, sep=";")
+
+    with fs.open('projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:
+        aum = pd.read_csv(f, sep=";")
+
+    aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])
+    flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])
+
+    # Noms courts
+    aum = aum.rename(columns={
+        'Registrar Account - ID': 'reg_id',
+        'Product - Isin': 'isin',
+        'Centralisation Date': 'date',
+        'Quantity - AUM': 'qty_aum',
+        'Value - AUM €': 'val_eur',
+        'Registrar Account - Region': 'region',
+    })
+    flows = flows.rename(columns={
+        'Registrar Account - ID': 'reg_id',
+        'Product - Isin': 'isin',
+        'Centralisation Date': 'date',
+        'Quantity - NetFlows': 'qty_net',
+        'Value € - NetFlows': 'val_net_eur',
+    })
+
+    aum['reg_id'] = aum['reg_id'].astype(str)
+    flows['reg_id'] = flows['reg_id'].astype(str)
+
+    return aum, flows
+
+# ─────────────────────────────────────────────
+# 2. ÉTAPE 1 — Univers de référence à T_REF
+# ─────────────────────────────────────────────
+def build_reference_universe(aum, t_ref=None):
+    """
+    Construit l'univers de référence à t_ref (dernière date par défaut).
+    Retourne :
+      - aum_ref  : AUM à t_ref pour chaque (reg_id, isin)
+      - weights  : poids normalisé par reg_id
+      - universe : ensemble des reg_id retenus (>= MIN_AUM_EUR)
+    """
+    if t_ref is None:
+        t_ref = aum['date'].max()
+
+    print(f"\n[Étape 1] Date de référence : {t_ref.date()}")
+
+    # Exclure Off Distribution / Private Clients (sur région ou nom)
+    mask_excl = aum['reg_id'].isin(EXCLUDE_REGISTRAR)
+    if 'region' in aum.columns:
+        mask_excl |= aum['region'].isin(EXCLUDE_REGISTRAR)
+    aum_clean = aum[~mask_excl].copy()
+
+    # AUM à t_ref
+    aum_ref = aum_clean[aum_clean['date'] == t_ref][['reg_id', 'isin', 'qty_aum', 'val_eur']].copy()
+
+    # AUM total par reg_id à t_ref
+    aum_by_reg = aum_ref.groupby('reg_id')['val_eur'].sum().rename('total_eur')
+
+    # Filtrage >= MIN_AUM_EUR
+    universe = set(aum_by_reg[aum_by_reg >= MIN_AUM_EUR].index)
+
+    total_eur_universe = aum_by_reg[aum_by_reg.index.isin(universe)].sum()
+    total_eur_all = aum_by_reg.sum()
+    coverage = total_eur_universe / total_eur_all if total_eur_all > 0 else 0
+
+    print(f"  Registrar IDs à t_ref          : {len(aum_by_reg)}")
+    print(f"  Dont >= {MIN_AUM_EUR/1e6:.0f}M€                : {len(universe)}")
+    print(f"  Couverture encours              : {coverage:.1%}")
+
+    # Poids initiaux (scores à t_ref)
+    weights = (aum_by_reg[aum_by_reg.index.isin(universe)] / total_eur_universe).to_dict()
+
+    return aum_ref, weights, universe, t_ref
+
+# ─────────────────────────────────────────────
+# 3. PANEL AUM MENSUEL (forward-fill)
+# ─────────────────────────────────────────────
+def build_monthly_panel(aum, universe, t_ref):
+    """
+    Construit un panel mensuel complet (forward-fill des quantités AUM)
+    pour TOUS les reg_ids présents dans l'historique AUM — y compris les codes
+    historiques hors univers de référence, nécessaires pour la chirurgie.
+    """
+    # Toutes les fin de mois entre la première date et t_ref
+    date_min = aum['date'].min()
+    all_months = pd.date_range(start=date_min, end=t_ref, freq='ME')
+
+    # Pivot : (reg_id, isin) → série temporelle de qty_aum
+    aum_sorted = aum.sort_values(['reg_id', 'isin', 'date'])
+
+    # On ne garde que les lignes jusqu'à t_ref
+    aum_sorted = aum_sorted[aum_sorted['date'] <= t_ref]
+
+    # Multi-index pivot
+    panel = aum_sorted.pivot_table(
+        index='date', columns=['reg_id', 'isin'], values='qty_aum', aggfunc='last'
+    )
+
+    # Réindexer sur toutes les fins de mois
+    panel = panel.reindex(all_months)
+
+    # Forward-fill : si pas de mouvement, la quantité reste la même
+    panel = panel.ffill()
+
+    # Backward-fill initial pour les comptes qui démarrent après la première date
+    # (on ne remonte pas avant leur première apparition → on garde NaN)
+
+    print(f"\n[Panel mensuel] {len(all_months)} mois, {panel.shape[1]} (reg_id, isin) paires")
+
+    return panel, all_months
+
+# ─────────────────────────────────────────────
+# 4. FLOWS AGRÉGÉS PAR MOIS
+# ─────────────────────────────────────────────
+def aggregate_flows_monthly(flows, all_months):
+    """
+    Agrège les flows infra-mensuels sur chaque fenêtre ]fin_mois(t-1), fin_mois(t)].
+    Retourne un DataFrame indexé par (fin_mois, reg_id, isin).
+    """
+    flows_f = flows[flows['date'] <= all_months[-1]].copy()
+
+    # Associer chaque transaction à la fin de mois correspondante
+    # = la première fin de mois >= date de transaction
+    flows_f['month_end'] = flows_f['date'].apply(
+        lambda d: all_months[all_months >= d][0] if any(all_months >= d) else pd.NaT
+    )
+    flows_f = flows_f.dropna(subset=['month_end'])
+
+    # Agrégation
+    monthly_flows = flows_f.groupby(['month_end', 'reg_id', 'isin'])['qty_net'].sum()
+    monthly_flows = monthly_flows.reset_index()
+    monthly_flows.columns = ['date', 'reg_id', 'isin', 'qty_net_month']
+
+    print(f"\n[Flows mensuels] {len(monthly_flows)} enregistrements (reg_id, isin, mois)")
+
+    return monthly_flows
+
+# ─────────────────────────────────────────────
+# 5. ÉTAPE 2 — Score de cohérence temporelle
+# ─────────────────────────────────────────────
+def compute_reconciliation_error(qty_t_minus1, qty_t, net_flow, alpha=ALPHA):
+    """
+    Calcule l'erreur de réconciliation normalisée pour un (reg_id, isin, mois).
+
+    Attendu : qty_t_minus1 + net_flow ≈ qty_t
+    Erreur   : |qty_t_minus1 + net_flow - qty_t| / max(|qty_t|, |qty_t_minus1|)
+
+    Retourne :
+      - err_ratio  : erreur relative (0 = parfait)
+      - is_break   : True si err_ratio > alpha
+    """
+    denom = max(abs(qty_t), abs(qty_t_minus1), 1e-9)
+    err = abs(qty_t_minus1 + net_flow - qty_t)
+    err_ratio = err / denom
+    return err_ratio, err_ratio > alpha
+
+def score_propagation(panel, monthly_flows, weights, universe, all_months):
+    """
+    Propage les scores de t_ref vers t=0 (passé).
+
+    À chaque mois t (en remontant), pour chaque reg_id dans l'univers courant :
+      - Calculer l'erreur de réconciliation pondérée par ISIN
+      - Dégrader le score proportionnellement
+
+    Retourne :
+      - scores_history : dict {date → {reg_id → score}}
+      - errors_history : dict {date → {reg_id → err_pondérée}}
+      - mapping        : dict {reg_id_original → reg_id_courant} (après chirurgie)
+    """
+    # Initialisation
+    scores = dict(weights)  # scores à t_ref
+    scores_history = {all_months[-1]: dict(scores)}
+    errors_history = {}
+
+    # Mapping actuel (identité au départ)
+    mapping = {r: r for r in universe}
+
+    # Flows indexés pour accès rapide
+    flows_idx = monthly_flows.set_index(['date', 'reg_id', 'isin'])['qty_net_month']
+
+    # Remonter dans le temps
+    for i in range(len(all_months) - 2, -1, -1):
+        t_prev = all_months[i]
+        t_curr = all_months[i + 1]
+
+        errors_at_t = {}
+        new_scores = {}
+
+        for reg_orig, reg_curr in mapping.items():
+            score_curr = scores.get(reg_orig, 0)
+            if score_curr == 0:
+                new_scores[reg_orig] = 0
+                continue
+
+            # ISIN détenus par ce reg à t_curr (après mapping)
+            if reg_curr in panel.columns.get_level_values(0):
+                isin_list = panel[reg_curr].columns.tolist()
+            else:
+                # reg_curr n'existe pas du tout dans le panel → rupture totale
+                new_scores[reg_orig] = 0
+                errors_at_t[reg_orig] = 1.0
+                continue
+
+            total_aum_t = 0
+            weighted_err = 0
+            valid_isin_count = 0
+            all_nan_at_prev = True  # détecte si le compte n'existait pas à t_prev
+
+            for isin in isin_list:
+                qty_t = panel[reg_curr][isin].get(t_curr, np.nan)
+                qty_t_prev = panel[reg_curr][isin].get(t_prev, np.nan)
+
+                if pd.isna(qty_t):
+                    continue
+
+                if not pd.isna(qty_t_prev):
+                    all_nan_at_prev = False
+
+                if pd.isna(qty_t_prev):
+                    # ISIN existait à t_curr mais pas à t_prev → rupture sur cet ISIN
+                    # On le traite comme une erreur maximale pondérée par son AUM
+                    weight_isin = abs(qty_t)
+                    weighted_err += 1.0 * weight_isin
+                    total_aum_t += weight_isin
+                    valid_isin_count += 1
+                    continue
+
+                if qty_t == 0 and qty_t_prev == 0:
+                    continue
+                # Flow agrégé sur ]t_prev, t_curr]
+                try:
+                    net_flow = flows_idx.loc[(t_curr, reg_curr, isin)]
+                except KeyError:
+                    net_flow = 0.0
+
+                err_ratio, is_break = compute_reconciliation_error(
+                    qty_t_prev, qty_t, net_flow, alpha=ALPHA
+                )
+
+                # Pondération par AUM à t_curr
+                weight_isin = abs(qty_t)
+                weighted_err += err_ratio * weight_isin
+                total_aum_t += weight_isin
+                valid_isin_count += 1
+
+            if total_aum_t > 0 and valid_isin_count > 0:
+                avg_err = weighted_err / total_aum_t
+            else:
+                avg_err = 0.0
+
+            errors_at_t[reg_orig] = avg_err
+
+            # Dégradation du score : score(t-1) = score(t) * (1 - err_pondérée)
+            # Clippée entre 0 et score_curr
+            degradation = min(avg_err, 1.0)
+            new_scores[reg_orig] = score_curr * (1.0 - degradation)
+
+        scores = new_scores
+        scores_history[t_prev] = dict(scores)
+        errors_history[t_prev] = dict(errors_at_t)
+
+        total_score = sum(scores.values())
+        print(f"  {t_prev.date()} | Σ scores = {total_score:.4f} | "
+              f"Comptes actifs = {sum(1 for v in scores.values() if v > 0)}")
+
+    return scores_history, errors_history, mapping
+
+# ─────────────────────────────────────────────
+# 6. ÉTAPE 3 — Chirurgie de code
+# ─────────────────────────────────────────────
+def jaccard_isin(set_a, set_b):
+    """Coefficient de Jaccard entre deux ensembles d'ISIN."""
+    if not set_a or not set_b:
+        return 0.0
+    inter = len(set_a & set_b)
+    union = len(set_a | set_b)
+    return inter / union if union > 0 else 0.0
+
+def find_best_candidate(reg_orig, reg_curr, t_prev, t_curr,
+                        panel, flows_idx, all_regs_at_t_prev, mapping_inv):
+    """
+    Pour un reg_id dont le score a fortement chuté, cherche le meilleur
+    candidat j à t_prev tel que :
+      - j n'est pas déjà mappé à un autre compte original
+      - Le portefeuille ISIN de j à t_prev est similaire à celui de reg_curr à t_curr
+      - La réconciliation est bonne
+
+    Retourne (best_candidate, best_score_composite) ou (None, 0)
+    """
+    # ISIN du compte cible à t_curr
+    if reg_curr not in panel.columns.get_level_values(0):
+        return None, 0.0
+
+    isin_curr = set(panel[reg_curr].columns[
+        panel[reg_curr].loc[t_curr].notna() & (panel[reg_curr].loc[t_curr] != 0)
+    ].tolist())
+
+    if not isin_curr:
+        return None, 0.0
+
+    best_candidate = None
+    best_composite = 0.0
+
+    for j in all_regs_at_t_prev:
+        # Ne pas réutiliser un code déjà mappé
+        if j in mapping_inv:
+            continue
+        # Ne pas mapper sur soi-même si déjà présent
+        if j == reg_curr:
+            continue
+
+        if j not in panel.columns.get_level_values(0):
+            continue
+
+        # ISIN de j à t_prev
+        col_j = panel[j]
+        isin_j = set(col_j.columns[
+            col_j.loc[t_prev].notna() & (col_j.loc[t_prev] != 0)
+        ].tolist()) if t_prev in col_j.index else set()
+
+        if not isin_j:
+            continue
+
+        jac = jaccard_isin(isin_curr, isin_j)
+        if jac < MIN_JACCARD:
+            continue
+
+        # Erreur de réconciliation pour les ISIN communs
+        common_isin = isin_curr & isin_j
+        total_aum = 0
+        weighted_err = 0
+
+        for isin in common_isin:
+            qty_t = panel[reg_curr][isin].get(t_curr, np.nan) if isin in panel[reg_curr].columns else np.nan
+            qty_t_prev = panel[j][isin].get(t_prev, np.nan) if isin in panel[j].columns else np.nan
+
+            if pd.isna(qty_t) or pd.isna(qty_t_prev):
+                continue
+
+            try:
+                net_flow = flows_idx.loc[(t_curr, j, isin)]
+            except KeyError:
+                net_flow = 0.0
+
+            err_ratio, _ = compute_reconciliation_error(qty_t_prev, qty_t, net_flow)
+            weight_isin = abs(qty_t)
+            weighted_err += err_ratio * weight_isin
+            total_aum += weight_isin
+
+        avg_err = weighted_err / total_aum if total_aum > 0 else 1.0
+
+        composite = jac * (1.0 - min(avg_err, 1.0))
+
+        if composite > best_composite:
+            best_composite = composite
+            best_candidate = j
+
+    return best_candidate, best_composite
+
+
+def _recompute_score_with_candidate(reg_orig, candidate, t_prev, t_curr,
+                                     panel, flows_idx, score_curr):
+    """
+    Recalcule proprement l'erreur de réconciliation pour un candidat donné,
+    et retourne le score après chirurgie.
+    """
+    if candidate not in panel.columns.get_level_values(0):
+        return score_curr * 0  # candidat inexistant
+
+    isin_list_cand = panel[candidate].columns.tolist()
+    isin_list_curr = panel[reg_orig].columns.tolist() if reg_orig in panel.columns.get_level_values(0) else []
+
+    total_aum = 0
+    weighted_err = 0
+
+    for isin in isin_list_curr:
+        qty_t = panel[reg_orig][isin].get(t_curr, np.nan) if isin in panel[reg_orig].columns else np.nan
+        if pd.isna(qty_t) or qty_t == 0:
+            continue
+
+        qty_t_prev = panel[candidate][isin].get(t_prev, np.nan) if isin in panel[candidate].columns else np.nan
+
+        try:
+            net_flow = flows_idx.loc[(t_curr, candidate, isin)]
+        except KeyError:
+            net_flow = 0.0
+
+        if pd.isna(qty_t_prev):
+            err_ratio = 1.0
+        else:
+            err_ratio, _ = compute_reconciliation_error(qty_t_prev, qty_t, net_flow)
+
+        weight_isin = abs(qty_t)
+        weighted_err += err_ratio * weight_isin
+        total_aum += weight_isin
+
+    avg_err = weighted_err / total_aum if total_aum > 0 else 1.0
+    return score_curr * (1.0 - min(avg_err, 1.0))
+
+
+def run_surgery_pass(scores_history, errors_history, panel, monthly_flows,
+                     weights, universe, all_months):
+    """
+    Deuxième passe : pour chaque mois avec des ruptures fortes,
+    tente une chirurgie de code et recalcule les scores.
+
+    Corrections par rapport à la passe naïve :
+    - Après chirurgie, le score est recalculé proprement (pas juste composite)
+    - Le mapping propagé en arrière utilise le bon code à chaque étape
+    - Pré-filtre ISIN pour performance sur grand dataset
+
+    Retourne :
+      - mapping_history : {date → {reg_orig → reg_used}}
+      - surgery_log     : liste des opérations effectuées
+      - scores_final    : scores au dernier mois
+    """
+    flows_idx = monthly_flows.set_index(['date', 'reg_id', 'isin'])['qty_net_month']
+
+    # Tous les reg_ids présents dans le panel (univers + codes historiques)
+    all_regs_in_panel = set(panel.columns.get_level_values(0))
+
+    # Pré-calcul : ensemble d'ISIN par reg_id à chaque date (pour pré-filtre rapide)
+    # {reg_id → {date → set(isin)}}
+    reg_isin_at_date = {}
+    for reg in all_regs_in_panel:
+        reg_isin_at_date[reg] = {}
+        col = panel[reg]
+        for date in col.index:
+            active = set(col.columns[(col.loc[date].notna()) & (col.loc[date] != 0)].tolist())
+            if active:
+                reg_isin_at_date[reg][date] = active
+
+    # Mapping courant : reg_orig → reg_used
+    mapping = {r: r for r in universe}
+    mapping_inv = {r: r for r in universe}
+
+    surgery_log = []
+    mapping_history = {all_months[-1]: dict(mapping)}
+    scores_history_corrected = {all_months[-1]: dict(weights)}
+
+    # Scores courants (initialisés à t_ref)
+    scores = dict(weights)
+
+    for i in range(len(all_months) - 2, -1, -1):
+        t_prev = all_months[i]
+        t_curr = all_months[i + 1]
+
+        new_scores = {}
+        new_mapping = {}
+
+        for reg_orig in list(mapping.keys()):
+            reg_curr = mapping[reg_orig]
+            score_curr = scores.get(reg_orig, 0)
+
+            if score_curr == 0:
+                new_scores[reg_orig] = 0
+                new_mapping[reg_orig] = reg_curr
+                continue
+
+            # Erreur sans chirurgie (depuis étape 2)
+            err = errors_history.get(t_prev, {}).get(reg_orig, 0.0)
+            score_prev_no_surgery = score_curr * (1.0 - min(err, 1.0))
+            drop_ratio = 1.0 - (score_prev_no_surgery / score_curr) if score_curr > 0 else 0
+
+            if drop_ratio > SCORE_DROP_THRESHOLD:
+                # ── ISIN du compte courant à t_curr (pour pré-filtre) ──
+                isin_curr = reg_isin_at_date.get(reg_curr, {}).get(t_curr, set())
+
+                # ── Candidats disponibles (non déjà mappés) ──
+                available = all_regs_in_panel - set(mapping_inv.keys()) - {reg_curr}
+
+                best_candidate = None
+                best_score_after = score_prev_no_surgery  # baseline = pas de chirurgie
+                best_composite = 0.0
+
+                for j in available:
+                    # Pré-filtre rapide : overlap ISIN minimal
+                    isin_j = reg_isin_at_date.get(j, {}).get(t_prev, set())
+                    if not isin_curr or not isin_j:
+                        continue
+                    inter = len(isin_curr & isin_j)
+                    if inter == 0:
+                        continue
+                    jac = inter / len(isin_curr | isin_j)
+                    if jac < MIN_JACCARD:
+                        continue
+
+                    # Score après chirurgie avec ce candidat
+                    score_after = _recompute_score_with_candidate(
+                        reg_curr, j, t_prev, t_curr, panel, flows_idx, score_curr
+                    )
+                    composite = jac * (score_after / score_curr) if score_curr > 0 else 0
+
+                    if score_after > best_score_after:
+                        best_score_after = score_after
+                        best_candidate = j
+                        best_composite = composite
+
+                if best_candidate:
+                    surgery_log.append({
+                        'date': t_prev,
+                        'reg_orig': reg_orig,
+                        'reg_from': reg_curr,
+                        'reg_to': best_candidate,
+                        'jaccard_composite': round(best_composite, 4),
+                        'score_before': round(score_curr, 6),
+                        'score_after': round(best_score_after, 6),
+                        'drop_without_surgery': round(drop_ratio, 4),
+                        'gain_vs_no_surgery': round(best_score_after - score_prev_no_surgery, 6),
+                    })
+                    print(f"  🔧 CHIRURGIE {t_prev.date()} | {reg_orig} : "
+                          f"{reg_curr} → {best_candidate} "
+                          f"(composite={best_composite:.3f}, "
+                          f"score {score_curr:.4f}→{best_score_after:.4f})")
+
+                    # Mise à jour mapping
+                    if reg_curr in mapping_inv:
+                        del mapping_inv[reg_curr]
+                    mapping_inv[best_candidate] = reg_orig
+                    new_mapping[reg_orig] = best_candidate
+                    new_scores[reg_orig] = best_score_after
+                else:
+                    new_mapping[reg_orig] = reg_curr
+                    new_scores[reg_orig] = score_prev_no_surgery
+            else:
+                new_mapping[reg_orig] = reg_curr
+                new_scores[reg_orig] = score_prev_no_surgery
+
+        mapping = new_mapping
+        mapping_inv = {v: k for k, v in mapping.items()}
+        scores = new_scores
+        mapping_history[t_prev] = dict(mapping)
+        scores_history_corrected[t_prev] = dict(scores)
+
+        total_score = sum(s for s in scores.values() if not np.isnan(s))
+        n_surgeries = sum(1 for op in surgery_log if op['date'] == t_prev)
+        print(f"  {t_prev.date()} | Σ scores = {total_score:.4f} | "
+              f"Chirurgies = {n_surgeries}")
+
+    return mapping_history, surgery_log, scores, scores_history_corrected
+
+# ─────────────────────────────────────────────
+# 7. EXPORT RÉSULTATS
+# ─────────────────────────────────────────────
+def export_results(scores_history, mapping_history, surgery_log, all_months, out_prefix="carmignac"):
+    """Exporte les résultats clés en CSV."""
+
+    # Score history
+    rows = []
+    for date, sc in scores_history.items():
+        for reg, score in sc.items():
+            rows.append({'date': date, 'reg_id': reg, 'score': score})
+    df_scores = pd.DataFrame(rows) if rows else pd.DataFrame(columns=['date', 'reg_id', 'score'])
+    if not df_scores.empty:
+        df_scores = df_scores.sort_values(['date', 'score'], ascending=[True, False])
+    df_scores.to_csv(f"repair_results/{out_prefix}_scores.csv", index=False)
+
+    # Mapping history
+    rows_m = []
+    for date, mp in mapping_history.items():
+        for reg_orig, reg_used in mp.items():
+            rows_m.append({'date': date, 'reg_orig': reg_orig, 'reg_used': reg_used,
+                           'changed': reg_orig != reg_used})
+    df_mapping = pd.DataFrame(rows_m) if rows_m else pd.DataFrame(columns=['date', 'reg_orig', 'reg_used', 'changed'])
+    if not df_mapping.empty:
+        df_mapping = df_mapping.sort_values(['date', 'reg_orig'])
+    df_mapping.to_csv(f"repair_results/{out_prefix}_mapping.csv", index=False)
+
+    # Surgery log
+    if surgery_log:
+        df_surgery = pd.DataFrame(surgery_log).sort_values('date')
+        df_surgery.to_csv(f"repair_results/{out_prefix}_surgery_log.csv", index=False)
+        print(f"\n[Export] {len(surgery_log)} opérations de chirurgie sauvegardées.")
+    else:
+        print("\n[Export] Aucune chirurgie effectuée sur ce subset.")
+
+    print(f"[Export] Scores    → {out_prefix}_scores.csv")
+    print(f"[Export] Mapping   → {out_prefix}_mapping.csv")
+
+    return df_scores, df_mapping
+
+# ─────────────────────────────────────────────
+# 8. PIPELINE PRINCIPAL
+# ─────────────────────────────────────────────
+def run_pipeline(aum_path, flows_path):
+    print("=" * 60)
+    print("CARMIGNAC — Pipeline de réparation des Registrar IDs")
+    print("=" * 60)
+
+    # Chargement
+    aum, flows = load_data(aum_path, flows_path)
+
+    # Étape 1 — Univers de référence
+    aum_ref, weights, universe, t_ref = build_reference_universe(aum)
+
+    print(f"\n  Top 5 comptes par poids :")
+    for reg, w in sorted(weights.items(), key=lambda x: -x[1])[:5]:
+        print(f"    {reg} : {w:.4f} ({w*100:.2f}%)")
+
+    # Panel mensuel
+    panel, all_months = build_monthly_panel(aum, universe, t_ref)
+
+    # Flows mensuels agrégés
+    monthly_flows = aggregate_flows_monthly(flows, all_months)
+
+    # Étape 2 — Score de cohérence (sans chirurgie)
+    print("\n[Étape 2] Propagation des scores (sans chirurgie)...")
+    scores_history, errors_history, _ = score_propagation(
+        panel, monthly_flows, weights, universe, all_months
+    )
+
+    # Étape 3 — Chirurgie
+    print("\n[Étape 3] Passe de chirurgie...")
+    mapping_history, surgery_log, final_scores, scores_history_corrected = run_surgery_pass(
+        scores_history, errors_history, panel, monthly_flows,
+        weights, universe, all_months
+    )
+
+    # Export — on utilise les scores corrigés (post-chirurgie) comme référence
+    print("\n[Export des résultats...]")
+    df_scores, df_mapping = export_results(
+        scores_history_corrected, mapping_history, surgery_log, all_months
+    )
+
+    # Résumé final
+    print("\n" + "=" * 60)
+    print("RÉSUMÉ FINAL")
+    print("=" * 60)
+    print(f"  Dates couvertes        : {all_months[0].date()} → {all_months[-1].date()}")
+    print(f"  Comptes dans l'univers : {len(universe)}")
+    print(f"  Chirurgies effectuées  : {len(surgery_log)}")
+    score_by_date = {d: sum(s for s in sc.values() if s == s)
+                     for d, sc in scores_history_corrected.items()}
+    print(f"  Σ scores à t_ref       : {score_by_date[t_ref]:.4f}")
+    print(f"  Σ scores à t_min       : {score_by_date[all_months[0]]:.4f}")
+
+    return df_scores, df_mapping, surgery_log, scores_history_corrected, mapping_history
+
+
+if __name__ == "__main__":
+    df_scores, df_mapping, surgery_log, scores_history, mapping_history = run_pipeline(
+        "s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv",
+        "s3://projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv"
+    )
diff --git a/repair_challenge/repair_results/carmignac_report.html b/repair_challenge/repair_results/carmignac_report_0.1.html
similarity index 100%
rename from repair_challenge/repair_results/carmignac_report.html
rename to repair_challenge/repair_results/carmignac_report_0.1.html