diff --git a/carmignac_diagnostics.html b/carmignac_diagnostics.html new file mode 100644 index 0000000..a379c0b --- /dev/null +++ b/carmignac_diagnostics.html @@ -0,0 +1,4211 @@ + + + + + +Carmignac — Broken Months Diagnostics + + + + + +
+
Carmignac × ENSAE · Data Challenge 2025
+

Broken Months Diagnostics

+
+ Aggregate stock-flow equation check · ISIN level · threshold α = 15.0%
+ Missing % = |missing flow| / max(|ΔAUM|, |recorded flow|, 1 share) — capped at movement size, not stock level +
+
+ +
+
+ (ISIN, month) pairs + 39,874 + examined +
+
+ Broken months + 5,127 + 12.9% of pairs +
+
+ Likely lags + 425 + resolved by ±3d window +
+
+ Genuine gaps + 4702 + unresolved by lag fix +
+
+ ISINs affected + 319 + distinct ISINs +
+
+ Max missing % + 200.0% + worst single (isin, month) +
+
+ +
+ +
01 · Timeline
+ +
+
+ Broken (isin, month) pairs per month + Stacked: genuine gaps (red) vs likely accounting lags (amber) +
+
+
+
+
+ +
+
+
+ Total absolute missing flow per month + Sum of |missing flow| across all broken ISINs +
+
+
+
+
+ +
+
+ Missing % — top 5 ISINs over time + |missing flow| / max(|ΔAUM|, |recorded flow|) per ISIN — capped at movement size +
+
+
+
+
+
+ +
02 · By ISIN
+ +
+
+ ISIN summary — most affected +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ISINBroken monthsAvg missing %Total |missing| (shares)
LU09926249494744.07%7,282,917.3
MAPFRECG00015195.25%7,140,404.8
FR00101351035449.31%4,534,239.8
LU09926305996263.04%3,894,157.4
LU09926276114856.35%3,682,231.1
FR001400U4S3297.56%3,138,743.0
FR00135159701248.62%2,952,918.2
LU24206533673100.00%2,448,753.1
LU09926272983864.60%2,228,640.6
LU09926258395667.45%1,919,147.2
LU09926288584867.03%1,858,566.7
LU19324896903467.75%1,815,965.2
LU09926292375266.21%1,773,188.9
LU09926264805265.31%1,768,189.9
LU20043856674274.42%1,561,459.4
LU2809794220367.43%1,193,068.3
FR00101491796053.57%1,180,293.7
LU00991619935372.46%1,166,929.5
LU01644555024862.43%1,085,853.6
IE000YC0EJX13100.00%1,075,124.9
LU09926316475461.93%1,050,710.9
LU22959926761668.72%1,030,960.7
LU16237631481064.88%1,020,998.2
FR00101489816154.63%1,005,170.4
FR0014002E462984.02%981,066.3
FR00101492034553.83%949,632.1
FR00103061422747.59%910,391.8
LU09926297405375.16%893,106.6
LU17446304242166.75%883,715.3
FR00134670241972.21%881,422.2
FR00101491203641.81%845,388.8
FR00101491617064.74%744,756.9
LU05926989543354.78%740,668.6
LU2420652047373.07%722,247.7
LU1623763734658.35%714,314.9
GB00BJHPXB21359.04%703,777.9
LU16237619511552.94%703,364.6
LU22513052361585.38%684,707.2
FR00112690911167.96%651,258.0
LU27994734703100.00%649,277.5
FR00112690833046.46%645,983.5
FR00112695883944.23%644,350.2
LU0992631720785.37%627,665.2
FR00101491124752.18%618,821.7
LU09926312175163.79%604,562.6
FR00111474465964.54%568,511.6
FR00101493027149.76%560,943.4
LU12993066772954.56%535,603.7
LU22771463821763.48%509,201.9
LU24834850041100.00%499,400.7
LU21250443262100.00%487,234.5
LU24206512392100.00%459,265.5
FR00112690675150.84%433,581.9
LU10463273491272.68%427,245.1
FR00112691094454.78%422,515.9
LU2638444914268.46%407,488.0
FR00112695967158.62%398,757.6
FR0014000AL11989.21%397,703.3
FR00103126603446.60%388,424.9
LU09926250863784.18%385,821.5
FR00101489995259.81%367,936.8
LU11635337784064.18%357,642.1
LU09926307553673.25%348,752.5
FR00101492115956.02%345,149.2
LU16237628432237.94%343,880.9
LU16237632211691.85%332,120.5
LU09926252433772.70%330,544.9
LU12993051901937.92%322,024.8
LU2420652476276.59%317,375.0
FR00101476035153.85%281,786.1
FR0011269554858.81%274,204.1
LU09926253262371.30%273,314.4
LU1623762769968.91%261,729.3
LU1623762090498.66%254,647.8
LU09926277023368.77%251,741.1
LU11635334221750.05%240,974.3
FR00140082231553.61%239,550.0
LU19108374151275.16%233,741.3
FR00112691905967.92%233,060.5
FR00135160281969.73%228,155.0
LU05926990933865.98%227,607.2
FR0013516036670.04%222,579.3
FR00135160443100.00%216,692.3
LU12993008032100.00%200,000.0
LU17205133213047.10%198,934.7
LU2420652807761.95%198,717.9
LU17446282872352.18%198,641.8
LU19666310012170.62%191,292.9
FR001400U4T1195.77%188,038.1
FR0013515996945.70%187,975.6
FR00109566491066.58%182,399.8
FR001400KAX0379.39%181,792.5
LU12993057861849.45%178,062.8
FR00114438522942.35%176,222.0
LU03360840324145.19%174,443.9
LU22513053191757.95%170,521.2
LU22959923202469.31%167,720.2
LU08076895823170.19%154,860.6
FR001400U4V7194.15%149,362.0
LU2809794576297.96%148,388.0
LU09926284291973.11%145,992.9
LU12993113211385.23%141,041.3
LU03360834975452.86%139,746.9
FR00140081Y1115.72%137,216.6
LU2250732281321.38%135,107.5
LU12993063211329.06%132,268.3
LU16237624131463.03%129,186.8
LU1792391671978.85%126,329.7
FR001400U4U9256.40%123,428.9
LU17205115493155.40%117,687.7
LU09926275381777.29%112,601.0
LU07055728233949.48%111,226.6
LU09926300862959.55%111,205.4
LU2799473397492.93%109,628.1
FR00114438602658.57%106,376.0
FR00112693641065.08%97,707.2
LU03360838105551.87%94,772.0
FR00140139F6199.93%92,918.8
LU09926283462161.71%92,565.4
LU08076901683548.37%89,175.7
LU13177040511546.77%88,880.9
FR0011269125384.69%81,029.2
LU08076909113847.80%80,801.0
LU12993111641866.23%78,375.0
LU17205157062263.80%76,338.4
LU24206507771100.00%72,334.0
FR00112693801280.09%71,802.4
LU10463270002061.48%69,869.4
LU12993032293770.58%65,182.1
LU09926278842654.85%64,849.2
LU12993070553148.51%64,837.4
LU28097948161101.82%60,656.9
LU17923919111465.43%59,288.2
LU12993053562061.03%58,771.5
LU1748451231884.20%58,075.0
LU02942496923549.72%55,883.7
LU09926265632359.99%55,015.8
LU2020612904867.08%53,960.8
LU12993055131988.95%50,164.7
LU08076898222575.48%49,882.8
FR001400R3Z5199.89%48,231.6
LU22513054001646.91%47,393.4
GB00BK1W2P36193.48%47,184.3
LU2020612813781.02%47,174.6
FR00140082311036.97%47,023.3
LU17205136771746.32%46,384.5
LU11635336952573.53%45,521.6
LU09926279673362.72%45,455.7
LU12993068342852.65%45,430.7
LU2250732448349.05%40,999.7
LU08076900853653.90%40,533.6
LU24206518254100.00%40,054.0
LU0992626050881.43%38,794.1
GB00BJHPHZ49192.76%38,148.5
LU12993018762287.78%37,430.0
LU09926286921549.36%37,391.2
LU09926266471169.53%34,454.4
FR00112693494758.14%34,401.1
LU13177041351448.12%34,148.7
LU1299311677689.28%32,314.4
LU11635333492247.98%32,007.6
LU09926309121773.59%30,982.1
LU16237630641055.22%30,254.4
LU17205118951674.98%29,941.8
LU1720515615231.98%28,812.9
LU05926992593146.60%28,649.4
LU09926274541953.06%27,291.0
FR0010956607866.70%27,021.1
LU09926251691884.41%26,564.8
LU09926320251987.98%25,900.3
FR00112691825356.07%25,809.6
LU19666307062254.46%24,776.8
LU12993074851048.40%24,572.5
LU16237625042100.00%24,000.0
LU08076897494755.21%23,352.7
FR0011269158875.10%22,618.8
LU09926273711585.63%21,166.7
FR00113658732039.40%20,994.0
LU08076908382769.16%20,962.3
LU08076907542959.22%20,917.5
LU0992625912680.88%20,848.3
LU16237635772100.00%20,000.0
LU17446288732100.00%20,000.0
LU19108373322100.00%20,000.0
LU12993020982073.69%19,725.7
LU1623762330282.58%19,071.3
LU09926256721757.37%15,813.7
FR001400KAV4123.13%14,534.6
LU12993022541971.54%14,216.4
LU09926302431068.29%14,148.3
LU1910837258966.81%14,055.5
LU1966631266376.35%12,512.3
GB00BJHPHX25395.98%12,456.4
FR00112690752100.00%12,343.5
LU0992626134583.97%11,971.3
LU08076902421274.91%11,587.3
LU09926294011640.84%11,184.1
LU09926310502337.90%11,033.6
LU2295992163860.03%11,014.7
LU2799473124972.78%10,873.8
FR0010956615697.69%10,765.1
LU1122072793867.13%10,424.0
LU09926267201651.73%10,260.1
LU16237634942100.00%10,000.0
LU0992630326970.48%9,505.2
LU09926301691958.20%9,434.4
LU12993118341658.86%8,990.0
FR0013516002577.18%8,970.1
FR00112694063548.47%8,853.5
LU12993030621071.54%8,849.6
LU1299303575789.28%8,834.8
LU2020612490691.61%8,674.8
LU12993045401473.91%8,324.2
LU05534153231373.59%8,203.9
LU12993059431747.63%8,103.3
LU1792392216765.69%7,890.2
LU2809794493199.88%7,599.9
LU1299303906887.63%7,293.3
FR0011269547299.92%6,830.7
LU1299304896820.65%6,095.9
LU24206523933100.00%5,759.0
LU2721495260219.03%4,748.7
LU28126168161102.62%4,733.4
LU16237626861100.00%4,637.3
LU2420651072124.62%4,600.0
LU0992626993485.19%4,449.2
GB00BJHPHY32123.56%4,437.6
LU1873148016829.75%4,430.2
LU08076890792654.81%4,171.4
LU09926255995100.00%4,152.0
FR0011269570698.02%4,001.3
LU1792391838847.67%3,965.0
LU08076896653146.71%3,929.3
LU09926313082261.38%3,892.0
LU13177043091370.23%3,678.0
LU05534133852252.07%3,525.6
LU09926308392254.95%3,450.4
LU2139905785450.70%3,404.1
LU2004385154994.04%3,397.9
LU17923912421447.72%3,348.4
LU2020612730658.40%3,310.1
FR0014008207466.84%3,113.5
LU0553407650841.60%2,909.5
LU19324768791273.44%2,676.5
LU0807689400989.94%2,657.8
GB00BNDQ7P95138.67%2,568.6
LU1048598442664.51%2,527.5
FR00135278273100.00%2,451.3
LU1046327182984.88%2,407.3
LU05926991761293.78%2,309.3
LU2585801173125.70%2,258.6
LU13177042184100.00%2,191.4
LU09926293102100.00%2,100.0
LU08076889311665.80%1,913.2
LU09926319931148.91%1,888.1
FR00140051L1116.40%1,861.4
GB00BQXJRP97116.82%1,860.5
LU0992629583354.38%1,607.8
LU05534110901188.28%1,504.2
LU08076906711273.31%1,462.4
LU04133720603100.00%1,428.5
FR0013516010266.79%1,391.2
FR00112693311072.33%1,131.9
LU1623762256473.84%1,057.2
LU2721494966556.19%1,019.9
FR001400JG642100.00%1,000.0
LU0992629823358.70%900.0
LU31492007461100.00%868.2
LU0807689152825.96%825.5
LU1299304201264.94%794.0
LU22959922471125.89%785.4
LU2721495427119.25%708.4
LU1122113498686.88%706.4
LU09926289322058.92%662.0
LU1873147984271.56%640.0
FR0011269323394.11%637.9
FR00140081Z8115.10%573.0
LU1299301017472.06%571.1
LU1792392059352.16%537.0
LU1623762926232.53%487.9
FR001400M1N0117.22%482.8
LU24269511952100.00%400.0
LU0992631480431.77%357.9
LU2462965026125.99%326.3
LU0992625755581.79%314.8
LU24206526331100.00%295.0
LU11635339352100.00%260.0
LU24903245012100.00%223.2
LU1792391754488.22%204.4
LU17923924892100.00%200.0
LU17923921332100.00%200.0
LU17923925622100.00%200.0
LU17923923072100.00%200.0
LU17923913252100.00%200.0
LU17923915982100.00%200.0
LU09926287752100.00%181.3
LU24903244101100.00%180.0
LU0553405878239.36%156.1
LU09926315633100.00%133.5
LU24273204991100.00%131.9
LU0992628007383.13%131.5
LU1163533851490.59%108.9
LU1299302684526.03%108.9
LU0992628189263.53%95.5
LU09926296664100.00%72.0
LU17923926461100.00%61.0
LU19666309611103.85%54.0
LU08076910592100.00%23.9
LU10463272652100.00%20.0
LU10463274222100.00%20.0
LU25858012561100.00%10.0
LU1299301280241.62%3.2
LU14352451512100.00%2.0
LU09926263082100.00%2.0
LU09926291532100.00%2.0
LU12993028412100.00%2.0
LU12993024112100.00%2.0
LU12993037322100.00%2.0
LU09926270251100.00%1.0
+
+
+ +
03 · Detail log
+ +
+
+ All broken (isin, month) pairs + + lag = likely resolved by extending flow window ±3 days + +
+
Threshold α = 15.0% · showing up to 200 rows
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateISINQ(t-1)Q(t)Net flowMissingMissing % of movement
2021-10-31LU129930187641,272.841,472.8-200.0+400.0200.00%
2019-07-31LU0992625086437,947.2439,921.3-1,965.9+3,940.0199.59%
2021-02-28LU0992626563160,596.6160,255.0340.0-681.6199.53%
2019-01-31FR0010148999462,052.4464,356.2-2,291.9+4,595.8199.48%
2020-08-31LU0992631217376,366.3383,863.7-7,431.0+14,928.3199.11%
2020-11-30LU17446304242,294,380.32,276,677.317,929.9-35,632.9198.73%
2020-02-29LU0992626480430,198.2443,198.5-12,812.2+25,812.5198.55%
2020-03-31LU129930209838,843.339,312.6-461.3+930.6198.29%
2020-07-31LU1966631001170,407.8170,527.0-121.7+241.0198.01%
2020-11-30LU0807689822101,096.297,961.83,047.6-6,182.0197.23%
2015-06-30FR00114438522,491,982.02,497,945.0-5,767.0+11,730.0196.71%
2023-10-31LU22513052361,990,279.12,036,540.8-44,669.9+90,931.6196.56%
2020-12-31LU080769083820,389.119,920.4450.2-918.9196.06%
2015-09-30LU0807689822488,811.1488,926.1-110.0+225.0195.65%
2019-06-30LU12993039065,919.94,934.81,031.5-2,016.5195.50%
2021-09-30FR00112695881,060,192.01,058,056.42,038.7-4,174.3195.46%
2015-06-30LU10463270001,263,760.91,264,553.4-755.2+1,547.8195.28%
2023-12-31LU1299306834119,675.6119,318.9335.3-692.0194.01%
2022-04-30LU0992625086207,158.1205,002.02,024.0-4,180.0193.87%
2022-01-31FR0014002E46256,889.0241,587.014,359.0-29,661.0193.84%
2023-08-31LU0992625839915,623.1894,089.820,186.5-41,719.8193.75%
2022-12-31LU17446304243,822,166.03,835,622.0-14,391.1+27,847.0193.50%
2021-03-31LU1910837415358,720.9357,862.3926.7-1,785.3192.65%
2020-01-31LU174845123138,470.038,530.0-65.0+125.0192.31%
2016-01-31LU099262516935,875.935,910.7-32.1+66.9192.22%lag
2022-01-31FR00101491204,340,913.24,390,919.1-45,984.5+95,990.4191.96%
2022-11-30LU17446282871,641,117.31,641,835.4-792.6+1,510.7190.61%
2018-10-31FR00101491791,164,762.41,147,977.115,184.5-31,969.8190.46%
2022-01-31LU0992629237495,401.8482,477.914,361.6-27,285.5189.99%
2022-07-31LU0807690911189,610.8190,694.7-973.0+2,057.0189.76%
2022-10-31FR0011269596125,580.3127,992.0-2,162.6+4,574.3189.67%
2017-12-31LU0992629740657,035.4659,997.9-3,305.2+6,267.7189.63%
2020-01-31LU19324896901,366,492.21,296,106.262,852.6-133,238.5189.30%
2019-12-31LU129930454013,578.714,050.3-528.4+1,000.0189.24%
2021-04-30LU099262753825,568.020,441.65,770.8-10,897.2188.83%
2022-01-31LU179239167172,194.199,213.5-30,417.4+57,436.8188.83%
2017-12-31FR001014916186,076.083,467.82,311.0-4,919.2188.61%
2020-11-30LU129930209841,760.942,009.4-218.9+467.5188.06%
2016-05-31LU11635334221,301,663.01,258,612.637,903.5-80,953.9188.04%
2022-01-31LU129931132142,203.073,421.5-35,669.1+66,887.6187.52%
2020-02-29LU0992628858863,778.9824,020.745,554.7-85,313.0187.28%lag
2023-01-31LU172051154981,368.081,704.1-386.1+722.2187.05%
2022-03-31LU0992625086206,132.3207,158.1-886.2+1,912.0186.39%
2023-09-30LU0992628858438,475.5441,051.8-2,217.3+4,793.6186.06%
2023-01-31LU09926305991,678,605.11,704,500.1-30,172.2+56,067.3185.82%
2021-05-31LU1299302254115,388.1114,650.0862.5-1,600.5185.57%
2020-07-31LU0992625086535,580.6534,726.6730.6-1,584.7185.55%
2016-09-30FR0011269596783,214.5782,419.3676.3-1,471.5185.05%
2024-01-31LU1932476879325,636.5325,652.2-18.6+34.4184.62%
2020-02-29LU172051189543,254.643,529.6-325.0+600.0184.62%
2023-05-31LU2295992320132,582.4133,563.5-1,159.8+2,141.0184.59%
2021-05-31LU1966631001173,809.6173,898.3-74.9+163.5184.48%
2018-03-31FR0010149211471,094.0473,287.3-1,852.6+4,045.8184.46%
2020-01-31LU0592699093620,491.8613,273.88,572.4-15,790.4184.20%
2022-08-31LU129930705582,033.782,410.4-449.3+826.0183.84%
2019-09-30LU05926989542,005,571.01,969,318.043,247.7-79,500.7183.83%
2023-04-30LU22513052361,607,014.61,642,066.8-29,349.5+64,401.6183.73%
2021-01-31LU0992631647890,404.6902,439.0-9,906.1+21,940.5182.32%
2020-11-30FR00101489812,059,749.32,058,733.91,235.0-2,250.4182.22%
2023-01-31FR0010149211327,146.4330,371.3-3,939.2+7,164.1181.87%
2024-01-31LU09926289321,886.01,941.0-45.0+100.0181.82%
2023-11-30LU22513052362,036,540.82,097,480.7-74,623.4+135,563.3181.66%
2021-05-31FR001126919064,812.065,087.9-223.3+499.2180.93%
2019-10-31LU01644555021,036,350.41,050,812.0-17,882.4+32,344.0180.87%
2018-01-31LU05926989543,939,802.63,896,946.734,288.2-77,144.1180.01%
2023-10-31FR0013467024439,847.7474,147.1-27,200.6+61,500.0179.30%
2020-08-31LU1966631001170,527.0170,389.9173.0-310.1179.26%
2023-09-30FR001126959698,268.099,596.7-1,677.0+3,005.8179.23%
2024-12-31LU13177040511,595,339.21,595,558.8-174.0+393.5179.22%
2017-08-31LU0992625326291,030.7287,628.74,303.0-7,705.0179.06%
2025-09-30LU1966631001631,719.2631,198.7407.9-928.4178.37%
2015-06-30FR001030614218,311,140.518,251,758.675,920.3-135,302.2178.22%
2023-07-31LU080768974912,451.312,600.9-116.0+265.6177.53%
2024-12-31LU0336083497197,574.1198,450.7-676.6+1,553.2177.18%
2018-05-31LU1299306834213,840.7212,969.41,129.3-2,000.6177.16%
2017-09-30LU0992628429351,111.5350,401.0924.8-1,635.3176.83%
2022-05-31LU0992625086205,002.0205,190.6-246.4+435.0176.54%
2025-09-30LU2295992320121,843.0121,632.9275.1-485.1176.36%
2021-01-31LU129930209839,805.439,214.5449.3-1,040.2176.05%
2015-06-30LU055341338514,905.814,940.7-26.5+61.5175.94%
2022-11-30LU229599232072,950.673,238.0-217.9+505.3175.84%
2022-06-30LU1623763221183,210.0161,343.416,529.3-38,395.9175.59%
2019-12-31FR0010148999427,252.8417,399.57,430.1-17,283.4175.41%
2024-01-31FR0010149302735,920.5736,643.1-961.3+1,683.9175.16%
2016-03-31FR00114438601,145,538.01,142,337.02,403.0-5,604.0175.07%
2023-04-30LU00991619931,002,257.61,009,455.5-5,399.0+12,596.9175.01%
2020-09-30FR001126940643,164.743,038.4169.5-295.8174.50%
2021-10-31LU0992630755477,721.4478,529.7-1,087.1+1,895.4174.35%
2020-03-31LU09926272982,052,919.02,025,161.337,346.1-65,103.8174.33%
2018-02-28LU0992630755808,227.7807,403.91,119.8-1,943.6173.56%
2017-07-31FR001126919082,669.082,622.363.7-110.4173.41%
2021-10-31FR0011269109331,219.2328,880.41,715.9-4,054.8173.36%
2020-12-31FR001013510313,830,818.113,860,890.0-41,041.4+71,113.3173.27%
2021-09-30LU1299303229133,454.3133,893.3-320.9+760.0173.09%lag
2016-05-31FR0011269596739,413.1748,534.1-6,666.7+15,787.7173.09%
2017-09-30LU0336084032905,060.7903,772.81,769.6-3,057.5172.78%
2020-02-29LU08076889312,418.42,334.1116.2-200.5172.53%
2020-07-31LU129930535643,197.643,307.8-79.7+190.0172.30%
2019-08-31LU0705572823191,408.5190,736.4484.5-1,156.6172.10%
2015-02-28FR001014916159,793.659,564.8164.6-393.4171.93%
2023-06-30FR0010147603480,073.7481,257.1-845.4+2,028.8171.44%
2020-02-29LU0164455502939,005.0981,098.6-29,854.0+71,947.6170.92%
2015-09-30LU116353377849,461.047,120.01,659.0-4,000.0170.87%
2019-05-31LU0992628858699,522.1750,528.0-72,332.9+123,338.8170.52%
2020-04-30LU099262842929,281.029,252.320.2-48.9170.25%
2020-05-31LU09926272982,030,187.32,025,778.46,278.9-10,687.9170.22%
2016-08-31LU1299311164161,356.5162,533.4-823.2+2,000.0169.95%
2023-10-31LU19324896902,217,456.92,230,427.4-18,551.9+31,522.4169.91%
2015-05-31FR00114438601,061,577.01,058,070.02,428.0-5,935.0169.23%
2024-04-30LU033608381029,939.829,683.0176.1-432.9168.60%
2021-08-31FR001114744678,444.1113,252.0-50,842.0+85,649.9168.46%
2017-03-31LU05926990931,076,587.41,073,680.94,268.1-7,174.6168.10%
2019-12-31FR0010149211409,898.2407,338.01,736.4-4,296.6167.83%
2023-10-31LU1299303229216,803.6217,740.3-633.7+1,570.4167.65%
2022-02-28LU09926305991,142,618.01,154,363.8-7,934.8+19,680.6167.55%
2021-11-30LU0992631217788,933.5772,557.324,431.2-40,807.5167.03%
2022-04-30FR00101489811,864,707.31,871,345.2-4,427.3+11,065.3166.70%
2023-11-30FR0010147603446,570.7446,778.3-311.9+519.5166.56%
2020-11-30FR0011269067352,858.1351,384.0979.7-2,453.8166.46%
2016-10-31LU0992625243391,142.1389,570.32,384.2-3,956.0165.93%
2017-02-28LU1163533778143,982.1124,197.912,993.8-32,778.0165.68%
2023-10-31FR0014008223380,340.1378,682.42,531.6-4,189.2165.48%
2019-10-31FR001114744667,234.567,459.5-344.9+569.8165.23%
2019-03-31LU0992630086176,886.1174,609.33,503.9-5,780.7164.98%
2019-05-31LU099262737147,782.947,732.977.0-127.0164.94%
2023-09-30FR0010147603456,903.8455,994.51,404.8-2,314.2164.73%
2021-02-28LU129931183412,832.512,571.1169.2-430.6164.70%
2020-11-30LU0992625243125,559.9129,130.3-5,549.7+9,120.1164.33%
2015-09-30FR0011365873390,215.6390,089.681.0-207.0164.29%
2021-04-30LU0099161993573,323.0575,509.0-3,404.3+5,590.4164.21%
2023-04-30LU1623763221241,910.8236,499.63,474.2-8,885.3164.20%
2020-08-31LU172051189540,964.840,823.390.7-232.1164.08%
2019-04-30LU099263091214,727.715,075.9-223.0+571.2164.04%
2021-01-31LU0992628858482,635.0485,511.3-4,492.4+7,368.8164.03%
2022-08-31LU1623763221148,821.4153,502.2-7,330.7+12,011.6163.85%
2017-02-28LU05926989544,127,778.44,105,759.414,047.7-36,066.7163.80%
2022-07-31LU0992631217596,237.2608,620.7-19,526.2+31,909.7163.42%
2018-06-30LU129930454021,433.222,180.4-473.1+1,220.3163.32%
2021-09-30LU0099161993580,023.6565,574.89,141.8-23,590.6163.27%
2019-01-31LU1299302254130,238.7130,205.153.1-86.7163.21%
2020-04-30LU2004385667258,295.8262,285.2-6,315.5+10,305.0163.17%
2023-07-31LU00991619931,002,044.71,005,278.0-2,031.7+5,264.9162.84%
2015-07-31LU0992631217754,675.8756,672.1-3,178.2+5,174.5162.81%
2021-08-31LU0992631647637,109.2634,546.51,597.5-4,160.2162.34%
2023-04-30FR0014002E46332,803.7361,232.0-17,572.0+46,000.3161.81%
2021-10-31LU09926269939,619.310,384.0-470.7+1,235.4161.57%
2023-12-31LU09926272981,291,509.21,350,102.9-95,406.0+153,999.7161.42%
2020-12-31LU099262753815,425.714,994.9263.6-694.4161.19%
2019-10-31FR0010148999421,081.1435,152.3-8,522.9+22,594.2160.57%
2016-12-31LU099263083913,379.113,408.7-17.8+47.4160.32%
2015-11-30LU05926991769,020.09,010.25.9-15.7160.24%
2021-11-30FR0011269182127,509.7127,542.9-55.2+88.4160.07%
2022-09-30LU09926272984,375,224.44,168,305.9123,717.5-330,636.0159.79%
2020-08-31LU0099161993593,941.8584,542.75,584.5-14,983.6159.41%
2015-04-30LU00991619932,029,467.42,030,238.2-1,303.7+2,074.4159.12%
2018-08-31LU080768958212,908.912,921.9-22.0+35.0159.09%
2017-12-31LU080769083861,773.261,625.986.6-233.9158.77%
2017-04-30LU0992629237749,836.0752,781.3-5,017.6+7,962.9158.70%
2021-10-31LU09926313087,289.17,444.2-89.8+245.0157.88%
2020-10-31LU080768982298,286.7101,096.2-1,624.0+4,433.5157.81%
2016-08-31LU0992628429527,879.5526,068.01,037.1-2,848.6157.26%
2022-04-30LU0992629237570,682.3572,423.1-3,054.0+4,794.8157.00%
2023-05-31LU0592699093671,484.0675,936.3-2,508.9+6,961.2156.35%
2021-03-31LU2020612904100,970.799,557.0790.3-2,204.0155.90%
2020-04-30FR0010148999392,332.0392,204.4229.6-357.1155.56%
2017-08-31FR0010149211477,891.9477,699.9346.3-538.2155.43%
2022-03-31LU09926276119,909,531.49,954,705.8-24,922.9+70,097.3155.17%
2023-03-31LU09926288581,083,266.21,076,128.012,956.7-20,094.9155.09%
2020-02-29LU09926272982,018,330.32,052,919.0-19,033.6+53,622.3155.03%
2020-11-30LU129930551320,778.821,684.1-494.7+1,400.0154.65%
2020-12-31LU059269925946,559.547,055.6-914.1+1,410.2154.27%
2023-12-31FR00112690831,519,948.41,518,968.01,806.8-2,787.2154.26%
2023-07-31FR0014002E46268,920.0242,313.049,518.3-76,125.3153.73%
2015-07-31FR00101491121,695,906.11,657,854.620,444.0-58,495.5153.73%
2023-05-31FR001126919048,349.249,106.6-406.6+1,164.1153.67%
2016-11-30LU080769008564,706.763,581.9602.5-1,727.4153.57%
2016-11-30LU05534153235,493.25,488.78.4-12.9153.44%
2018-03-31LU05926989543,815,977.43,832,806.2-31,593.5+48,422.3153.27%
2020-03-31LU129930551323,647.623,680.3-61.4+94.1153.26%
2016-12-31LU104632734959,172.659,143.215.6-45.0153.16%
2023-07-31LU2004385667360,131.4429,144.2-36,305.1+105,317.9152.61%
2020-11-30LU0992629237557,745.8536,727.839,965.3-60,983.3152.59%
2020-05-31LU080769075434,619.334,667.0-90.7+138.4152.58%
2021-12-31LU1299303229135,235.9134,755.7916.3-1,396.6152.41%
2023-08-31FR0010149211308,255.1309,363.4-2,115.0+3,223.3152.40%
2023-03-31FR00101489811,794,364.41,793,166.2624.9-1,823.0152.15%
2020-12-31LU0992625243129,130.3125,528.81,877.6-5,479.1152.13%
2019-01-31LU033608381099,361.196,862.71,302.4-3,800.9152.13%
2015-04-30LU01644555021,660,764.71,663,095.6-1,209.8+3,540.6151.90%
2022-05-31LU09926305991,175,832.51,201,247.2-13,174.9+38,589.6151.84%
2018-03-31FR00103126602,391,650.72,399,556.6-15,293.8+23,199.7151.69%
2020-05-31LU172051570696,899.097,899.0-516.2+1,516.2151.62%
2018-05-31FR001114744655,551.755,458.9180.3-273.0151.44%
2022-01-31LU09926305991,108,235.91,142,618.0-17,623.7+52,005.8151.26%
2024-02-29LU2004385667685,696.0654,756.815,789.1-46,728.2151.03%
2020-05-31LU0992628858363,338.5337,708.650,246.3-75,876.2151.01%
2021-06-30FR0011269182123,641.3123,661.5-10.3+30.5150.84%
2016-10-31LU0992627702587,865.2576,973.95,528.7-16,420.0150.76%
2017-09-30FR00101492031,143,731.21,146,564.3-1,438.1+4,271.3150.76%
2024-08-31LU0992625839741,726.3741,760.1-66.6+100.4150.74%
+
+
+ +
+ + + + + \ No newline at end of file diff --git a/repair_challenge/aaa.py b/repair_challenge/aaa.py deleted file mode 100644 index 785302b..0000000 --- a/repair_challenge/aaa.py +++ /dev/null @@ -1,702 +0,0 @@ -""" -Carmignac Data Challenge — Broken Months Diagnostics -===================================================== -Detects months where the aggregate stock-flow equation is violated -at the ISIN level (across all accounts): - - Σ_r Q_{r,s}(t) - Σ_r Q_{r,s}(t-1) ≠ Σ_r F_{r,s}(t-1→t) - -The residual is the "missing flow": - missing_{s}(t) = [Q_agg(t) - Q_agg(t-1)] - F_agg(t) - -This is a market-level check, independent of individual account identity. -It captures: - - Genuinely missing flow records - - End-of-month accounting lags (transactions dated at boundary) - - Corporate actions (dividends, splits) not reflected in flows - -Outputs -------- - carmignac_broken_months.csv — machine-readable, loaded by carmignac_repair.py - carmignac_diagnostics.html — interactive HTML report - -Usage ------ - python carmignac_diagnostics.py - python carmignac_diagnostics.py \\ - --aum raw_AUM.csv \\ - --flows raw_flows.csv \\ - --out carmignac_broken_months.csv \\ - --html carmignac_diagnostics.html \\ - --alpha 0.02 -""" - -import argparse -import json -import os -import sys - -from collections import defaultdict -import s3fs - -import numpy as np -import pandas as pd - - -# ───────────────────────────────────────────────────────────── -# 1. LOAD -# ───────────────────────────────────────────────────────────── -def load_data(): - fs = s3fs.S3FileSystem( - client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'}, - key = os.environ["AWS_ACCESS_KEY_ID"], - secret = os.environ["AWS_SECRET_ACCESS_KEY"], - token = os.environ["AWS_SESSION_TOKEN"]) - - with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f: - flows = pd.read_csv(f, sep=";") - - with fs.open('projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f: - aum = pd.read_csv(f, sep=";") - - aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date']) - flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date']) - - return aum, flows - -# ───────────────────────────────────────────────────────────── -# 2. AGGREGATE AND DETECT BROKEN MONTHS -# ───────────────────────────────────────────────────────────── - -def detect_broken_months(aum, flows, alpha=0.02, lag_days=3): - """ - For each (isin, month-end t), compute: - - Q_agg(t) : total shares held across all accounts - - Q_agg(t-1) : idem previous month (forward-filled) - - F_agg(t) : total net flows recorded in ]EOM(t-1), EOM(t)] - - missing(t) : [Q_agg(t) - Q_agg(t-1)] - F_agg(t) - - missing_pct : |missing| / max(Q_agg(t), Q_agg(t-1)) - - A month is flagged as "broken" when missing_pct > alpha. - - Additionally, a month is flagged as a potential "lag" when: - - It is broken with the standard window - - But would NOT be broken if flows dated within lag_days of EOM - are shifted to the adjacent month - - Parameters - ---------- - alpha : tolerance threshold (same as ALPHA in carmignac_repair.py) - lag_days : number of boundary days to test for accounting lag - - Returns - ------- - df_broken : DataFrame with all (isin, date) pairs where missing_pct > alpha - df_all : Full DataFrame including non-broken months (for plotting) - """ - # Monthly calendar - t_min = aum["Centralisation Date"].min() - t_max = aum["Centralisation Date"].max() - all_months = pd.date_range(t_min, t_max, freq="ME") - - # ── Aggregate AUM per (isin, month-end) ────────────────────── - aum_agg = ( - aum.groupby(["Product - Isin", "Centralisation Date"])["Quantity - AUM"] - .sum() - .reset_index() - .rename(columns={"Product - Isin": "isin", - "Centralisation Date": "date", - "Quantity - AUM": "qty_agg"}) - ) - # Forward-fill sparse panel - aum_pivot = aum_agg.pivot(index="date", columns="isin", values="qty_agg") - aum_pivot = aum_pivot.reindex(all_months).ffill() - - # ── Aggregate flows per (isin, month-end) — standard window ── - def bucket_flows(flows_df, months, lower_offset=0, upper_offset=0): - """Aggregate flows with optional boundary extension (in days).""" - fc = flows_df.copy() - def assign_month(d): - # Extended window: ]EOM(t-1) - lower_offset, EOM(t) + upper_offset] - for m in months: - eom_prev = m - pd.offsets.MonthEnd(1) - lo = eom_prev - pd.Timedelta(days=lower_offset) - hi = m + pd.Timedelta(days=upper_offset) - if lo < d <= hi: - return m - return pd.NaT - - fc["month_end"] = fc["Centralisation Date"].apply(assign_month) - fc = fc.dropna(subset=["month_end"]) - agg = (fc.groupby(["Product - Isin", "month_end"])["Quantity - NetFlows"] - .sum() - .reset_index() - .rename(columns={"Product - Isin": "isin", - "month_end": "date", - "Quantity - NetFlows": "flow_agg"})) - return agg - - flows_std = bucket_flows(flows, all_months) - flows_lag = bucket_flows(flows, all_months, - lower_offset=lag_days, - upper_offset=lag_days) - - def flows_to_pivot(df, months): - piv = df.pivot(index="date", columns="isin", values="flow_agg") - return piv.reindex(months).fillna(0.0) - - fpiv_std = flows_to_pivot(flows_std, all_months) - fpiv_lag = flows_to_pivot(flows_lag, all_months) - - # ── Compute residuals ───────────────────────────────────────── - rows = [] - isins = aum_pivot.columns.tolist() - - for i in range(1, len(all_months)): - t_curr = all_months[i] - t_prev = all_months[i - 1] - - for isin in isins: - q_curr = aum_pivot[isin].get(t_curr, np.nan) if isin in aum_pivot.columns else np.nan - q_prev = aum_pivot[isin].get(t_prev, np.nan) if isin in aum_pivot.columns else np.nan - - if pd.isna(q_curr) or pd.isna(q_prev): - continue - - delta = q_curr - q_prev - - # Standard window - f_std = fpiv_std[isin].get(t_curr, 0.0) if isin in fpiv_std.columns else 0.0 - missing_std = delta - f_std - - # Extended lag window - f_lag = fpiv_lag[isin].get(t_curr, 0.0) if isin in fpiv_lag.columns else 0.0 - missing_lag = delta - f_lag - - # ── Denominator choice ──────────────────────────────── - # Normalise by the size of the *movement* (max of delta_AUM - # and recorded flow), not by the stock level. This avoids - # astronomically large percentages when a position is tiny - # but the missing flow is a normal-sized number. - # - # Interpretation: "what fraction of the expected movement - # is unaccounted for?" 100% = the entire movement is missing. - # - # A minimum absolute threshold (min_abs_shares) suppresses - # noise from residual micro-positions (rounding artefacts). - min_abs_shares = 1.0 # ignore positions smaller than 1 share - movement = max(abs(delta), abs(f_std), min_abs_shares) - denom_std = movement - - movement_lag = max(abs(delta), abs(f_lag), min_abs_shares) - denom_lag = movement_lag - - pct_std = abs(missing_std) / denom_std - pct_lag = abs(missing_lag) / denom_lag - - broken_std = pct_std > alpha - broken_lag = pct_lag > alpha - - # A "lag" month: broken with standard, NOT broken with extended window - is_lag = broken_std and (not broken_lag) - - rows.append({ - "date": t_curr, - "isin": isin, - "q_agg_prev": round(q_prev, 3), - "q_agg_curr": round(q_curr, 3), - "delta_aum": round(delta, 3), - "flow_agg": round(f_std, 3), - "missing_flow": round(missing_std, 3), - "missing_pct": round(pct_std, 6), - "broken": broken_std, - "is_lag": is_lag, - }) - - df_all = pd.DataFrame(rows) - df_broken = df_all[df_all["broken"]].sort_values("missing_pct", ascending=False) - return df_broken, df_all - - -# ───────────────────────────────────────────────────────────── -# 3. PRINT SUMMARY -# ───────────────────────────────────────────────────────────── - -def print_summary(df_broken, df_all, alpha): - total = len(df_all) - n_broken = len(df_broken) - n_lag = df_broken["is_lag"].sum() - - print("\n" + "=" * 60) - print(" CARMIGNAC — Broken Months Diagnostics") - print("=" * 60) - print(f" (isin, month) pairs examined : {total}") - print(f" Broken (missing_pct > {alpha:.0%}) : {n_broken} " - f"({n_broken/total*100:.1f}%)") - print(f" Of which likely lag : {n_lag}") - print(f" Of which genuine gap : {n_broken - n_lag}") - - if n_broken: - print("\n Top 10 by missing_pct:") - cols = ["date", "isin", "missing_flow", "missing_pct", "is_lag"] - print(df_broken[cols].head(10).to_string(index=False)) - - # Monthly breakdown - by_month = (df_broken.groupby("date") - .agg(n_broken=("isin", "count"), - total_missing=("missing_flow", lambda x: x.abs().sum())) - .sort_values("n_broken", ascending=False) - .head(5)) - if len(by_month): - print("\n Most affected months:") - print(by_month.to_string()) - print() - - -# ───────────────────────────────────────────────────────────── -# 4. BUILD HTML REPORT -# ───────────────────────────────────────────────────────────── - -def build_html(df_broken, df_all, alpha): - # ── JS-ready data ──────────────────────────────────────────── - # Timeline: n_broken and total_missing per month - tl = (df_all[df_all["broken"]] - .groupby("date") - .agg(n_broken=("isin", "count"), - total_missing=("missing_flow", lambda x: x.abs().sum()), - n_lag=("is_lag", "sum")) - .reindex(df_all["date"].sort_values().unique()) - .fillna(0)) - tl.index = pd.to_datetime(tl.index) - dates_str = json.dumps([d.strftime("%Y-%m-%d") for d in tl.index]) - - def jf(arr, dec=4): - return json.dumps([round(float(v), dec) if not np.isnan(v) else None for v in arr]) - - n_broken_js = jf(tl["n_broken"].values, 0) - total_miss_js = jf(tl["total_missing"].values) - n_lag_js = jf(tl["n_lag"].values, 0) - - # Per-ISIN summary - isin_sum = (df_broken.groupby("isin") - .agg(n_months=("date", "count"), - avg_pct=("missing_pct", "mean"), - total_abs=("missing_flow", lambda x: x.abs().sum())) - .sort_values("total_abs", ascending=False)) - - ISIN_COLORS = [ - "#2563eb","#16a34a","#dc2626","#d97706","#7c3aed", - "#0891b2","#db2777","#65a30d","#ea580c","#6366f1", - ] - - # Per-ISIN missing_pct timeseries for the top 5 ISINs - top_isins = isin_sum.head(5).index.tolist() - all_dates = sorted(df_all["date"].unique()) - isin_ts_datasets = [] - for idx, isin in enumerate(top_isins): - sub = df_all[df_all["isin"] == isin].set_index("date")["missing_pct"].reindex(all_dates).fillna(0) - isin_ts_datasets.append({ - "label": isin, - "data": [round(float(v) * 100, 3) for v in sub.values], - "borderColor": ISIN_COLORS[idx % len(ISIN_COLORS)], - "backgroundColor": ISIN_COLORS[idx % len(ISIN_COLORS)] + "22", - "borderWidth": 2, - "pointRadius": 0, - "tension": 0.3, - "fill": False, - }) - isin_ts_json = json.dumps(isin_ts_datasets) - all_dates_str = json.dumps([d.strftime("%Y-%m-%d") if hasattr(d, 'strftime') - else str(d)[:10] for d in all_dates]) - - # Detail table rows - detail_rows = "" - for _, r in df_broken.head(200).iterrows(): - lag_badge = 'lag' if r["is_lag"] else "" - pct_class = "pct-high" if r["missing_pct"] > 0.1 else "pct-med" - detail_rows += f""" - - {r['date'].strftime('%Y-%m-%d') if hasattr(r['date'], 'strftime') else str(r['date'])[:10]} - {r['isin']} - {r['q_agg_prev']:,.1f} - {r['q_agg_curr']:,.1f} - {r['flow_agg']:,.1f} - {r['missing_flow']:+,.1f} - {r['missing_pct']*100:.2f}% - {lag_badge} - """ - - # ISIN summary table - isin_rows = "" - for isin, row in isin_sum.iterrows(): - isin_rows += f""" - - {isin} - {int(row['n_months'])} - {row['avg_pct']*100:.2f}% - {row['total_abs']:,.1f} - """ - - # KPIs - total = len(df_all) - n_broken_kpi = len(df_broken) - n_lag_kpi = int(df_broken["is_lag"].sum()) - n_genuine = n_broken_kpi - n_lag_kpi - max_pct = df_broken["missing_pct"].max() * 100 if len(df_broken) else 0 - n_isins = df_broken["isin"].nunique() - - no_broken_msg = "" - if n_broken_kpi == 0: - no_broken_msg = '
✓ No broken months detected at this threshold.
' - - html = f""" - - - - -Carmignac — Broken Months Diagnostics - - - - - -
-
Carmignac × ENSAE · Data Challenge 2025
-

Broken Months Diagnostics

-
- Aggregate stock-flow equation check · ISIN level · threshold α = {alpha:.1%}
- Missing % = |missing flow| / max(|ΔAUM|, |recorded flow|, 1 share) — capped at movement size, not stock level -
-
- -
-
- (ISIN, month) pairs - {total:,} - examined -
-
- Broken months - {n_broken_kpi:,} - {n_broken_kpi/total*100:.1f}% of pairs -
-
- Likely lags - {n_lag_kpi} - resolved by ±{3}d window -
-
- Genuine gaps - {n_genuine} - unresolved by lag fix -
-
- ISINs affected - {n_isins} - distinct ISINs -
-
- Max missing % - {max_pct:.1f}% - worst single (isin, month) -
-
- -
- -
01 · Timeline
- -
-
- Broken (isin, month) pairs per month - Stacked: genuine gaps (red) vs likely accounting lags (amber) -
-
-
-
-
- -
-
-
- Total absolute missing flow per month - Sum of |missing flow| across all broken ISINs -
-
-
-
-
- -
-
- Missing % — top 5 ISINs over time - |missing flow| / max(|ΔAUM|, |recorded flow|) per ISIN — capped at movement size -
-
-
-
-
-
- -
02 · By ISIN
- -
-
- ISIN summary — most affected -
-
- {'
No broken months detected.
' if n_broken_kpi == 0 else f""" - - - - - - {isin_rows} -
ISINBroken monthsAvg missing %Total |missing| (shares)
"""} -
-
- -
03 · Detail log
- -
-
- All broken (isin, month) pairs - - lag = likely resolved by extending flow window ±3 days - -
-
Threshold α = {alpha:.1%} · showing up to 200 rows
-
- {'
✓ No broken months detected at this threshold.
' if n_broken_kpi == 0 else f""" - - - - - - - - {detail_rows} -
DateISINQ(t-1)Q(t)Net flowMissingMissing % of movement
"""} -
-
- -
- - - - -""" - return html - - -# ───────────────────────────────────────────────────────────── -# 5. MAIN -# ───────────────────────────────────────────────────────────── - -def main(): - parser = argparse.ArgumentParser( - description="Detect broken months in Carmignac AUM/Flows data" - ) - parser.add_argument("--out", default="carmignac_broken_months.csv", - help="Machine-readable output (loaded by carmignac_repair.py)") - parser.add_argument("--html", default="carmignac_diagnostics.html") - parser.add_argument("--alpha", type=float, default=0.15, - help="Tolerance threshold (default 0.15 = 15%%)") - parser.add_argument("--lag", type=int, default=3, - help="Boundary days to test for accounting lag (default 3)") - args = parser.parse_args() - - def resolve(p): - if os.path.exists(p): - return p - alt = os.path.join(os.path.dirname(os.path.abspath(__file__)), p) - if os.path.exists(alt): - return alt - sys.exit(f"[ERROR] File not found: {p}") - - print("[Load] AUM") - print("[Load] Flows") - aum, flows = load_data() - - print(f"\n[Detect] Running broken-month detection (α={args.alpha:.1%}, lag=±{args.lag}d)...") - df_broken, df_all = detect_broken_months(aum, flows, alpha=args.alpha, lag_days=args.lag) - - print_summary(df_broken, df_all, args.alpha) - - # CSV output — this is what carmignac_repair.py will load - if len(df_broken): - df_broken.to_csv(args.out, index=False) - print(f"[Export] Broken months CSV → {args.out}") - else: - # Write empty file so repair pipeline can always try to load it - pd.DataFrame(columns=["date", "isin", "missing_pct", "is_lag"]).to_csv(args.out, index=False) - print(f"[Export] No broken months — empty CSV → {args.out}") - - html = build_html(df_broken, df_all, args.alpha) - with open(args.html, "w", encoding="utf-8") as f: - f.write(html) - print(f"[Export] HTML report → {args.html}") - - -if __name__ == "__main__": - main() diff --git a/repair_challenge/carmignac_diagnostics.py b/repair_challenge/carmignac_diagnostics.py index 56de0f8..785302b 100644 --- a/repair_challenge/carmignac_diagnostics.py +++ b/repair_challenge/carmignac_diagnostics.py @@ -68,7 +68,7 @@ def load_data(): # 2. AGGREGATE AND DETECT BROKEN MONTHS # ───────────────────────────────────────────────────────────── -def detect_broken_months(aum, flows, alpha=0.1, lag_days=3): +def detect_broken_months(aum, flows, alpha=0.02, lag_days=3): """ For each (isin, month-end t), compute: - Q_agg(t) : total shares held across all accounts @@ -116,7 +116,6 @@ def detect_broken_months(aum, flows, alpha=0.1, lag_days=3): def bucket_flows(flows_df, months, lower_offset=0, upper_offset=0): """Aggregate flows with optional boundary extension (in days).""" fc = flows_df.copy() - def assign_month(d): # Extended window: ]EOM(t-1) - lower_offset, EOM(t) + upper_offset] for m in months: @@ -165,17 +164,35 @@ def detect_broken_months(aum, flows, alpha=0.1, lag_days=3): continue delta = q_curr - q_prev - denom = max(abs(q_curr), abs(q_prev), 1e-9) # Standard window f_std = fpiv_std[isin].get(t_curr, 0.0) if isin in fpiv_std.columns else 0.0 missing_std = delta - f_std - pct_std = abs(missing_std) / denom # Extended lag window f_lag = fpiv_lag[isin].get(t_curr, 0.0) if isin in fpiv_lag.columns else 0.0 missing_lag = delta - f_lag - pct_lag = abs(missing_lag) / denom + + # ── Denominator choice ──────────────────────────────── + # Normalise by the size of the *movement* (max of delta_AUM + # and recorded flow), not by the stock level. This avoids + # astronomically large percentages when a position is tiny + # but the missing flow is a normal-sized number. + # + # Interpretation: "what fraction of the expected movement + # is unaccounted for?" 100% = the entire movement is missing. + # + # A minimum absolute threshold (min_abs_shares) suppresses + # noise from residual micro-positions (rounding artefacts). + min_abs_shares = 1.0 # ignore positions smaller than 1 share + movement = max(abs(delta), abs(f_std), min_abs_shares) + denom_std = movement + + movement_lag = max(abs(delta), abs(f_lag), min_abs_shares) + denom_lag = movement_lag + + pct_std = abs(missing_std) / denom_std + pct_lag = abs(missing_lag) / denom_lag broken_std = pct_std > alpha broken_lag = pct_lag > alpha @@ -196,7 +213,7 @@ def detect_broken_months(aum, flows, alpha=0.1, lag_days=3): "is_lag": is_lag, }) - df_all = pd.DataFrame(rows) + df_all = pd.DataFrame(rows) df_broken = df_all[df_all["broken"]].sort_values("missing_pct", ascending=False) return df_broken, df_all @@ -208,7 +225,7 @@ def detect_broken_months(aum, flows, alpha=0.1, lag_days=3): def print_summary(df_broken, df_all, alpha): total = len(df_all) n_broken = len(df_broken) - n_lag = df_broken["is_lag"].sum() + n_lag = df_broken["is_lag"].sum() print("\n" + "=" * 60) print(" CARMIGNAC — Broken Months Diagnostics") @@ -288,7 +305,7 @@ def build_html(df_broken, df_all, alpha): "tension": 0.3, "fill": False, }) - isin_ts_json = json.dumps(isin_ts_datasets) + isin_ts_json = json.dumps(isin_ts_datasets) all_dates_str = json.dumps([d.strftime("%Y-%m-%d") if hasattr(d, 'strftime') else str(d)[:10] for d in all_dates]) @@ -321,12 +338,12 @@ def build_html(df_broken, df_all, alpha): """ # KPIs - total = len(df_all) + total = len(df_all) n_broken_kpi = len(df_broken) - n_lag_kpi = int(df_broken["is_lag"].sum()) - n_genuine = n_broken_kpi - n_lag_kpi - max_pct = df_broken["missing_pct"].max() * 100 if len(df_broken) else 0 - n_isins = df_broken["isin"].nunique() + n_lag_kpi = int(df_broken["is_lag"].sum()) + n_genuine = n_broken_kpi - n_lag_kpi + max_pct = df_broken["missing_pct"].max() * 100 if len(df_broken) else 0 + n_isins = df_broken["isin"].nunique() no_broken_msg = "" if n_broken_kpi == 0: @@ -426,7 +443,8 @@ def build_html(df_broken, df_all, alpha):
Carmignac × ENSAE · Data Challenge 2025

Broken Months Diagnostics

- Aggregate stock-flow equation check · ISIN level · threshold α = {alpha:.1%} + Aggregate stock-flow equation check · ISIN level · threshold α = {alpha:.1%}
+ Missing % = |missing flow| / max(|ΔAUM|, |recorded flow|, 1 share) — capped at movement size, not stock level
@@ -491,7 +509,7 @@ def build_html(df_broken, df_all, alpha):
Missing % — top 5 ISINs over time - |missing| / max(Q(t), Q(t-1)) per ISIN + |missing flow| / max(|ΔAUM|, |recorded flow|) per ISIN — capped at movement size
@@ -534,7 +552,7 @@ def build_html(df_broken, df_all, alpha): DateISIN Q(t-1)Q(t) Net flowMissing - Missing % + Missing % of movement {detail_rows} """}