Revised the architecture + linked peers and stocks
This commit is contained in:
parent
7b97a9a79b
commit
9db61993fa
|
|
@ -1,629 +0,0 @@
|
|||
"""
|
||||
=============================================================================
|
||||
CARMIGNAC × ENSAE — Pipeline : Performance → Flux nets
|
||||
=============================================================================
|
||||
|
||||
Pipeline complet :
|
||||
1. Chargement & exploration
|
||||
2. Table de correspondance shareClass → ISIN (clé de jointure)
|
||||
3. Jointure AUM (stocks) × Performance (weekly_perf)
|
||||
4. Feature engineering : features de performance décalées + percentile
|
||||
5. Construction de la variable cible : flux nets (ΔAum proxy)
|
||||
6. Modèle prédictif : Random Forest avec walk-forward validation
|
||||
7. Analyse d'importance des variables (SHAP-like permutation importance)
|
||||
|
||||
NOTE : Ce script utilise les fichiers *_head.csv pour la démonstration.
|
||||
Remplacer les chemins par les fichiers complets pour l'analyse finale.
|
||||
|
||||
Dépendances : pandas, numpy, scikit-learn, matplotlib, seaborn
|
||||
=============================================================================
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.gridspec as gridspec
|
||||
import seaborn as sns
|
||||
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import mean_absolute_error, r2_score
|
||||
from sklearn.inspection import permutation_importance
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
# ── Style global ──────────────────────────────────────────────────────────────
|
||||
plt.rcParams.update({
|
||||
'figure.facecolor': 'white',
|
||||
'axes.facecolor': '#f8f9fa',
|
||||
'axes.grid': True,
|
||||
'grid.alpha': 0.4,
|
||||
'font.family': 'DejaVu Sans',
|
||||
})
|
||||
COLORS = ['#1f4e79', '#2e75b6', '#70ad47', '#ed7d31', '#a50026', '#ffc000']
|
||||
|
||||
# =============================================================================
|
||||
# 1. CHARGEMENT DES DONNÉES
|
||||
# =============================================================================
|
||||
|
||||
print("=" * 60)
|
||||
print("1. CHARGEMENT DES DONNÉES")
|
||||
print("=" * 60)
|
||||
|
||||
# ── Remplacer par les chemins vers les fichiers complets ──────────────────────
|
||||
PATH_STOCKS = "equity_stocks_head.csv" # → fichier AUM mensuel par compte
|
||||
PATH_PERF = "weekly_perf_head.csv" # → performances hebdomadaires
|
||||
|
||||
stocks = pd.read_csv(PATH_STOCKS, index_col=0)
|
||||
perf = pd.read_csv(PATH_PERF, index_col=0)
|
||||
|
||||
# Parsing des dates
|
||||
stocks['Centralisation Date'] = pd.to_datetime(stocks['Centralisation Date'])
|
||||
perf['Date'] = pd.to_datetime(perf['Date'])
|
||||
|
||||
print(f"stocks : {stocks.shape[0]:,} lignes × {stocks.shape[1]} colonnes")
|
||||
print(f"perf : {perf.shape[0]:,} lignes × {perf.shape[1]} colonnes")
|
||||
print(f"\nstocks — plage dates : {stocks['Centralisation Date'].min().date()} → {stocks['Centralisation Date'].max().date()}")
|
||||
print(f"perf — plage dates : {perf['Date'].min().date()} → {perf['Date'].max().date()}")
|
||||
print(f"perf — périodes disponibles : {sorted(perf['perfPeriod'].unique())}")
|
||||
|
||||
# =============================================================================
|
||||
# 2. TABLE DE CORRESPONDANCE shareClass_name → ISIN
|
||||
# =============================================================================
|
||||
#
|
||||
# Problème : weekly_perf n'a pas d'ISIN, stocks n'a pas le nom complet
|
||||
# de shareclass. La jointure se fait en deux temps :
|
||||
# a) Extraction d'un nom court depuis chaque source
|
||||
# b) Matching fuzzy sur ce nom court + Type shareclass + Devise
|
||||
#
|
||||
# En production : remplacer par la table de référence ISIN complète
|
||||
# fournie par Morningstar (fichier Peers.csv) qui contient Name + ISIN.
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("2. TABLE DE CORRESPONDANCE shareClass → ISIN")
|
||||
print("=" * 60)
|
||||
|
||||
# ── Extraction du nom de stratégie (nom court) depuis perf ───────────────────
|
||||
# Exemples :
|
||||
# "Carmignac Pf Asia Discovery A EUR Acc" → "Asia Discovery", type=A, ccy=EUR
|
||||
# "Carmignac Investissement F EUR Acc" → "Investissement", type=F, ccy=EUR
|
||||
def parse_shareclass_name(name):
|
||||
"""
|
||||
Extrait (strategy_name, shareclass_type, currency) depuis le nom complet.
|
||||
Logique : on retire le préfixe Carmignac / Carmignac Pf, puis on parse
|
||||
le suffixe " X YYY Acc" en fin de chaîne.
|
||||
"""
|
||||
s = name.strip()
|
||||
for prefix in ['Carmignac Portfolio ', 'Carmignac Pf ', 'Carmignac ']:
|
||||
if s.startswith(prefix):
|
||||
s = s[len(prefix):]
|
||||
break
|
||||
# Suffix pattern : " A EUR Acc" ou " F USD Acc" etc.
|
||||
import re
|
||||
m = re.search(r'\s+([A-Z])\s+([A-Z]{3})\s+Acc\s*$', s)
|
||||
if m:
|
||||
strategy = s[:m.start()].strip()
|
||||
sc_type = m.group(1)
|
||||
currency = m.group(2)
|
||||
else:
|
||||
strategy = s
|
||||
sc_type = None
|
||||
currency = None
|
||||
return strategy, sc_type, currency
|
||||
|
||||
perf_parsed = perf['shareClass_name'].drop_duplicates().apply(
|
||||
lambda x: pd.Series(parse_shareclass_name(x),
|
||||
index=['strategy_name', 'sc_type', 'currency'])
|
||||
)
|
||||
perf_parsed['shareClass_name'] = perf['shareClass_name'].drop_duplicates().values
|
||||
print("Shareclass parsées depuis perf :")
|
||||
print(perf_parsed.to_string(index=False))
|
||||
|
||||
# ── Extraction du nom court depuis stocks ─────────────────────────────────────
|
||||
stocks['strategy_name'] = (stocks['Product - Fund']
|
||||
.str.replace('Carmignac Portfolio ', '', regex=False)
|
||||
.str.replace('Carmignac ', '', regex=False)
|
||||
.str.strip())
|
||||
|
||||
# ── Correspondance ISIN depuis stocks : fund + type + currency ────────────────
|
||||
isin_ref = (stocks[['strategy_name',
|
||||
'Product - Shareclass Type',
|
||||
'Product - Shareclass Currency',
|
||||
'Product - Isin']]
|
||||
.drop_duplicates()
|
||||
.rename(columns={
|
||||
'Product - Shareclass Type': 'sc_type',
|
||||
'Product - Shareclass Currency': 'currency',
|
||||
'Product - Isin': 'isin'
|
||||
}))
|
||||
|
||||
# ── Jointure sur (strategy_name, sc_type, currency) ──────────────────────────
|
||||
mapping = perf_parsed.merge(isin_ref, on=['strategy_name', 'sc_type', 'currency'], how='left')
|
||||
print("\nTable de correspondance shareClass_name → ISIN :")
|
||||
print(mapping[['shareClass_name', 'strategy_name', 'sc_type', 'currency', 'isin']].to_string(index=False))
|
||||
|
||||
matched = mapping['isin'].notna().sum()
|
||||
print(f"\nMatch : {matched}/{len(mapping)} shareclass liées à un ISIN")
|
||||
if matched < len(mapping):
|
||||
unmatched = mapping[mapping['isin'].isna()]['shareClass_name'].tolist()
|
||||
print(f"⚠ Non matchées (à compléter manuellement ou via Peers.csv) :")
|
||||
for u in unmatched:
|
||||
print(f" - {u}")
|
||||
|
||||
# Enrichissement de perf avec l'ISIN
|
||||
perf = perf.merge(mapping[['shareClass_name', 'isin', 'strategy_name']],
|
||||
on='shareClass_name', how='left')
|
||||
|
||||
# =============================================================================
|
||||
# 3. CONSTRUCTION DU PANEL MENSUEL
|
||||
# =============================================================================
|
||||
#
|
||||
# Objectif : une ligne = (compte client, fonds, mois)
|
||||
# Colonnes : AUM_t, puis features de performance sur les mois précédents
|
||||
#
|
||||
# Alignement temporel :
|
||||
# - stocks : snapshot mensuel (fin de mois)
|
||||
# - perf : données hebdomadaires → on prend la valeur la plus récente
|
||||
# avant ou à la date de snapshot mensuel
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("3. CONSTRUCTION DU PANEL MENSUEL")
|
||||
print("=" * 60)
|
||||
|
||||
# ── Pivot perf : une ligne par (isin, date_hebdo, perfPeriod) ────────────────
|
||||
perf_pivot = (perf
|
||||
.dropna(subset=['isin'])
|
||||
.pivot_table(index=['isin', 'Date'],
|
||||
columns='perfPeriod',
|
||||
values=['return', 'percentile'],
|
||||
aggfunc='mean')
|
||||
)
|
||||
# Aplatir les colonnes multi-index
|
||||
perf_pivot.columns = ['_'.join(col).strip() for col in perf_pivot.columns]
|
||||
perf_pivot = perf_pivot.reset_index()
|
||||
perf_pivot['Date'] = pd.to_datetime(perf_pivot['Date'])
|
||||
|
||||
print(f"perf_pivot shape : {perf_pivot.shape}")
|
||||
print(f"Colonnes de performance : {[c for c in perf_pivot.columns if c not in ['isin','Date']]}")
|
||||
|
||||
# ── Merge as-of : pour chaque snapshot stocks, trouver la perf hebdo ────────
|
||||
# la plus récente avant ou égale à la date de snapshot
|
||||
stocks_sorted = stocks.sort_values('Centralisation Date')
|
||||
perf_sorted = perf_pivot.sort_values('Date')
|
||||
|
||||
# Merge as-of par ISIN
|
||||
merged_parts = []
|
||||
for isin_val in stocks_sorted['Product - Isin'].unique():
|
||||
s_isin = stocks_sorted[stocks_sorted['Product - Isin'] == isin_val].copy()
|
||||
p_isin = perf_sorted[perf_sorted['isin'] == isin_val].copy()
|
||||
if p_isin.empty:
|
||||
merged_parts.append(s_isin)
|
||||
continue
|
||||
merged = pd.merge_asof(
|
||||
s_isin.sort_values('Centralisation Date'),
|
||||
p_isin.sort_values('Date'),
|
||||
left_on='Centralisation Date',
|
||||
right_on='Date',
|
||||
direction='backward',
|
||||
tolerance=pd.Timedelta('35 days') # max 5 semaines d'écart
|
||||
)
|
||||
merged_parts.append(merged)
|
||||
|
||||
panel = pd.concat(merged_parts, ignore_index=True)
|
||||
perf_cols = [c for c in panel.columns if c not in stocks.columns and c != 'isin' and c != 'Date']
|
||||
print(f"\nPanel après merge : {panel.shape}")
|
||||
print(f"Colonnes de perf jointes : {perf_cols}")
|
||||
n_matched = panel[perf_cols[0]].notna().sum() if perf_cols else 0
|
||||
print(f"Lignes avec performance jointe : {n_matched}/{len(panel)}")
|
||||
|
||||
# =============================================================================
|
||||
# 4. FEATURE ENGINEERING
|
||||
# =============================================================================
|
||||
#
|
||||
# Features construites par compte × fonds × mois :
|
||||
#
|
||||
# [A] Performance absolue décalée
|
||||
# - perf_6Mo : rendement sur 6 mois (lag=0, observé à t)
|
||||
# - perf_1Yr : rendement sur 1 an
|
||||
#
|
||||
# [B] Performance relative (percentile Morningstar)
|
||||
# - pct_6Mo : percentile dans la catégorie sur 6 mois
|
||||
# - pct_1Yr : percentile dans la catégorie sur 1 an
|
||||
#
|
||||
# [C] Features client (RFM proxy depuis AUM)
|
||||
# - aum_t : encours à t (proxy du M de RFM)
|
||||
# - aum_lag1 : encours à t-1 mois
|
||||
# - aum_lag3 : encours à t-3 mois
|
||||
# - aum_growth_1m : croissance MoM de l'AUM
|
||||
# - aum_growth_3m : croissance sur 3 mois
|
||||
#
|
||||
# [D] Variable cible : flux_net_proxy = AUM(t+1) - AUM(t)
|
||||
# (approximation des flux nets en l'absence des transactions brutes)
|
||||
# NOTE : avec les données de flux bruts (souscriptions + rachats),
|
||||
# remplacer par flux_net = sum(souscriptions) - sum(rachats)
|
||||
# sur la période t → t+1.
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("4. FEATURE ENGINEERING")
|
||||
print("=" * 60)
|
||||
|
||||
# ── Tri du panel ──────────────────────────────────────────────────────────────
|
||||
panel = panel.sort_values(['Registrar Account - ID', 'Product - Isin', 'Centralisation Date'])
|
||||
|
||||
# ── [C] Features AUM (par compte × fonds) ────────────────────────────────────
|
||||
panel['aum_lag1'] = panel.groupby(['Registrar Account - ID', 'Product - Isin'])['Value - AUM €'].shift(1)
|
||||
panel['aum_lag3'] = panel.groupby(['Registrar Account - ID', 'Product - Isin'])['Value - AUM €'].shift(3)
|
||||
|
||||
panel['aum_growth_1m'] = (panel['Value - AUM €'] - panel['aum_lag1']) / (panel['aum_lag1'].abs() + 1)
|
||||
panel['aum_growth_3m'] = (panel['Value - AUM €'] - panel['aum_lag3']) / (panel['aum_lag3'].abs() + 1)
|
||||
|
||||
# ── [D] Variable cible : ΔAum(t → t+1) ──────────────────────────────────────
|
||||
panel['aum_next'] = panel.groupby(['Registrar Account - ID', 'Product - Isin'])['Value - AUM €'].shift(-1)
|
||||
panel['flux_net_proxy'] = panel['aum_next'] - panel['Value - AUM €']
|
||||
|
||||
# ── Sélection des features ────────────────────────────────────────────────────
|
||||
# Colonnes de performance disponibles (dépend du contenu de perf)
|
||||
PERF_COLS_AVAILABLE = [c for c in perf_cols if any(
|
||||
tag in c for tag in ['6Mo', '1Yr', '6mo', '1yr', '6MoRet', '1YrRet']
|
||||
)]
|
||||
PCT_COLS_AVAILABLE = [c for c in perf_cols if 'percentile' in c.lower()]
|
||||
|
||||
# Si données head (seulement 1YrRet) → on utilise ce qui est disponible
|
||||
FEATURE_COLS = (
|
||||
['Value - AUM €', 'aum_lag1', 'aum_lag3', 'aum_growth_1m', 'aum_growth_3m']
|
||||
+ PERF_COLS_AVAILABLE
|
||||
+ PCT_COLS_AVAILABLE
|
||||
)
|
||||
FEATURE_COLS = [c for c in FEATURE_COLS if c in panel.columns]
|
||||
|
||||
print(f"Features sélectionnées ({len(FEATURE_COLS)}) :")
|
||||
for f in FEATURE_COLS:
|
||||
n_valid = panel[f].notna().sum()
|
||||
print(f" {f:<40} → {n_valid:,} valeurs non-nulles")
|
||||
|
||||
TARGET = 'flux_net_proxy'
|
||||
|
||||
# ── Dataset modèle ────────────────────────────────────────────────────────────
|
||||
model_data = panel.dropna(subset=FEATURE_COLS + [TARGET]).copy()
|
||||
print(f"\nDataset pour modélisation : {model_data.shape[0]:,} lignes")
|
||||
|
||||
# =============================================================================
|
||||
# 5. MODÈLE PRÉDICTIF — WALK-FORWARD VALIDATION
|
||||
# =============================================================================
|
||||
#
|
||||
# Validation walk-forward (expanding window) :
|
||||
# - Évite le data leakage temporel
|
||||
# - À chaque fold : train = tout le passé, test = le mois suivant
|
||||
# - On calcule MAE, R² sur la fenêtre de test
|
||||
#
|
||||
# Modèles comparés :
|
||||
# 1. Baseline : moyenne mobile (benchmark naïf)
|
||||
# 2. Ridge Regression : modèle linéaire régularisé
|
||||
# 3. Random Forest : non-linéaire, robuste aux outliers
|
||||
# 4. Gradient Boosting : state-of-the-art sur données tabulaires
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("5. WALK-FORWARD VALIDATION")
|
||||
print("=" * 60)
|
||||
|
||||
if model_data.empty:
|
||||
print("⚠ Pas assez de données (fichiers head) pour la modélisation.")
|
||||
print(" Le pipeline est prêt — relancer avec les fichiers complets.")
|
||||
RUN_MODEL = False
|
||||
else:
|
||||
RUN_MODEL = True
|
||||
dates_sorted = sorted(model_data['Centralisation Date'].unique())
|
||||
N_DATES = len(dates_sorted)
|
||||
MIN_TRAIN = max(2, N_DATES // 3) # au moins 1/3 des dates en train
|
||||
print(f"Dates disponibles : {N_DATES} | Min train : {MIN_TRAIN} snapshots")
|
||||
|
||||
if RUN_MODEL and N_DATES > MIN_TRAIN:
|
||||
|
||||
results = []
|
||||
models = {
|
||||
'Ridge': Ridge(alpha=1.0),
|
||||
'Random Forest': RandomForestRegressor(n_estimators=100, max_depth=5,
|
||||
random_state=42, n_jobs=-1),
|
||||
'Gradient Boost': GradientBoostingRegressor(n_estimators=100, max_depth=3,
|
||||
learning_rate=0.05,
|
||||
random_state=42),
|
||||
}
|
||||
scaler = StandardScaler()
|
||||
|
||||
for test_idx in range(MIN_TRAIN, N_DATES):
|
||||
train_dates = dates_sorted[:test_idx]
|
||||
test_date = dates_sorted[test_idx]
|
||||
|
||||
train = model_data[model_data['Centralisation Date'].isin(train_dates)]
|
||||
test = model_data[model_data['Centralisation Date'] == test_date]
|
||||
|
||||
X_train = train[FEATURE_COLS].fillna(0)
|
||||
y_train = train[TARGET]
|
||||
X_test = test[FEATURE_COLS].fillna(0)
|
||||
y_test = test[TARGET]
|
||||
|
||||
if len(X_test) == 0:
|
||||
continue
|
||||
|
||||
X_train_sc = scaler.fit_transform(X_train)
|
||||
X_test_sc = scaler.transform(X_test)
|
||||
|
||||
# Baseline : moyenne de l'AUM passé comme prédiction de flux
|
||||
baseline_pred = np.zeros(len(y_test))
|
||||
baseline_mae = mean_absolute_error(y_test, baseline_pred)
|
||||
|
||||
for model_name, model in models.items():
|
||||
X_tr = X_train_sc if model_name == 'Ridge' else X_train
|
||||
X_te = X_test_sc if model_name == 'Ridge' else X_test
|
||||
model.fit(X_tr, y_train)
|
||||
preds = model.predict(X_te)
|
||||
results.append({
|
||||
'test_date': test_date,
|
||||
'model': model_name,
|
||||
'mae': mean_absolute_error(y_test, preds),
|
||||
'r2': r2_score(y_test, preds) if len(y_test) > 1 else np.nan,
|
||||
'baseline_mae': baseline_mae,
|
||||
'n_test': len(y_test),
|
||||
})
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
print("\nRésultats agrégés (médiane sur tous les folds) :")
|
||||
summary = (results_df.groupby('model')
|
||||
.agg(MAE_median=('mae', 'median'),
|
||||
R2_median=('r2', 'median'),
|
||||
MAE_mean=('mae', 'mean'))
|
||||
.round(4))
|
||||
print(summary)
|
||||
|
||||
baseline_mae_median = results_df['baseline_mae'].median()
|
||||
print(f"\nBaseline (zéro) MAE médiane : {baseline_mae_median:.4f}")
|
||||
|
||||
else:
|
||||
if RUN_MODEL:
|
||||
print("⚠ Pas assez de dates distinctes pour le walk-forward.")
|
||||
print(" Modélisation ignorée sur données head — OK sur données complètes.")
|
||||
results_df = pd.DataFrame()
|
||||
|
||||
# =============================================================================
|
||||
# 6. IMPORTANCE DES VARIABLES
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("6. IMPORTANCE DES VARIABLES")
|
||||
print("=" * 60)
|
||||
|
||||
if RUN_MODEL and not model_data.empty and len(model_data) > 10:
|
||||
X_all = model_data[FEATURE_COLS].fillna(0)
|
||||
y_all = model_data[TARGET]
|
||||
|
||||
rf_final = RandomForestRegressor(n_estimators=200, max_depth=6,
|
||||
random_state=42, n_jobs=-1)
|
||||
rf_final.fit(X_all, y_all)
|
||||
|
||||
importances = pd.Series(rf_final.feature_importances_, index=FEATURE_COLS).sort_values(ascending=False)
|
||||
print("Importance des features (Random Forest) :")
|
||||
print(importances.round(4).to_string())
|
||||
|
||||
# Permutation importance (plus robuste)
|
||||
perm = permutation_importance(rf_final, X_all, y_all, n_repeats=10, random_state=42, n_jobs=-1)
|
||||
perm_imp = pd.Series(perm.importances_mean, index=FEATURE_COLS).sort_values(ascending=False)
|
||||
print("\nPermutation importance :")
|
||||
print(perm_imp.round(4).to_string())
|
||||
else:
|
||||
importances = pd.Series(dtype=float)
|
||||
perm_imp = pd.Series(dtype=float)
|
||||
print("Importance des variables : données insuffisantes (head CSV).")
|
||||
print("Simuler les noms de features attendues :")
|
||||
expected = FEATURE_COLS if FEATURE_COLS else [
|
||||
'Value - AUM €', 'aum_lag1', 'aum_lag3',
|
||||
'aum_growth_1m', 'aum_growth_3m',
|
||||
'return_6MoRet', 'return_1YrRet',
|
||||
'percentile_6MoRet', 'percentile_1YrRet'
|
||||
]
|
||||
print(" " + ", ".join(expected))
|
||||
|
||||
# =============================================================================
|
||||
# 7. VISUALISATIONS
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("7. GÉNÉRATION DES VISUALISATIONS")
|
||||
print("=" * 60)
|
||||
|
||||
fig = plt.figure(figsize=(18, 20))
|
||||
fig.patch.set_facecolor('white')
|
||||
gs = gridspec.GridSpec(4, 2, figure=fig, hspace=0.45, wspace=0.35)
|
||||
|
||||
# ── [A] Distribution des AUM par fonds ───────────────────────────────────────
|
||||
ax1 = fig.add_subplot(gs[0, :])
|
||||
aum_by_fund = stocks.groupby('strategy_name')['Value - AUM €'].sum().sort_values(ascending=False)
|
||||
bars = ax1.bar(aum_by_fund.index, aum_by_fund.values / 1e6, color=COLORS[:len(aum_by_fund)])
|
||||
ax1.set_title('AUM total par fonds (données disponibles)', fontsize=13, fontweight='bold', pad=10)
|
||||
ax1.set_ylabel('AUM (M€)')
|
||||
ax1.tick_params(axis='x', rotation=20)
|
||||
for bar, val in zip(bars, aum_by_fund.values):
|
||||
ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
|
||||
f'{val/1e6:.1f}M', ha='center', va='bottom', fontsize=8)
|
||||
|
||||
# ── [B] Évolution temporelle de l'AUM ────────────────────────────────────────
|
||||
ax2 = fig.add_subplot(gs[1, 0])
|
||||
aum_time = stocks.groupby('Centralisation Date')['Value - AUM €'].sum()
|
||||
ax2.fill_between(aum_time.index, aum_time.values / 1e6, alpha=0.3, color=COLORS[0])
|
||||
ax2.plot(aum_time.index, aum_time.values / 1e6, color=COLORS[0], linewidth=2)
|
||||
ax2.set_title('AUM agrégé — évolution temporelle', fontsize=12, fontweight='bold')
|
||||
ax2.set_ylabel('AUM (M€)')
|
||||
ax2.tick_params(axis='x', rotation=20)
|
||||
|
||||
# ── [C] Distribution des performances ────────────────────────────────────────
|
||||
ax3 = fig.add_subplot(gs[1, 1])
|
||||
perf_cols_ret = [c for c in perf.columns if 'return' == c]
|
||||
if perf_cols_ret:
|
||||
for col in perf_cols_ret[:3]:
|
||||
ax3.hist(perf[col].dropna(), bins=30, alpha=0.6, label=col)
|
||||
ax3.legend()
|
||||
else:
|
||||
ax3.hist(perf['return'].dropna(), bins=30, color=COLORS[1], alpha=0.8, edgecolor='white')
|
||||
ax3.set_xlabel('Rendement 1 an (%)')
|
||||
ax3.set_title('Distribution des performances (1YrRet)', fontsize=12, fontweight='bold')
|
||||
ax3.set_ylabel('Fréquence')
|
||||
|
||||
# ── [D] Scatter : performance vs percentile ──────────────────────────────────
|
||||
ax4 = fig.add_subplot(gs[2, 0])
|
||||
if 'return' in perf.columns and 'percentile' in perf.columns:
|
||||
sc = ax4.scatter(perf['return'], perf['percentile'],
|
||||
alpha=0.5, c=COLORS[0], edgecolors='none', s=25)
|
||||
ax4.set_xlabel('Rendement 1 an (%)')
|
||||
ax4.set_ylabel('Percentile dans la catégorie')
|
||||
ax4.set_title('Performance vs Rang relatif (peer percentile)', fontsize=12, fontweight='bold')
|
||||
# Ligne de référence médiane
|
||||
ax4.axhline(50, color='red', linestyle='--', alpha=0.5, label='Médiane (50e pct)')
|
||||
ax4.legend(fontsize=9)
|
||||
|
||||
# ── [E] Importance des variables (si disponible) ─────────────────────────────
|
||||
ax5 = fig.add_subplot(gs[2, 1])
|
||||
if not importances.empty:
|
||||
colors_imp = [COLORS[2] if 'perf' in f or 'return' in f or 'percentile' in f
|
||||
else COLORS[0] for f in importances.index]
|
||||
ax5.barh(importances.index[::-1], importances.values[::-1], color=colors_imp[::-1])
|
||||
ax5.set_title('Importance des features (Random Forest)', fontsize=12, fontweight='bold')
|
||||
ax5.set_xlabel('Importance (Gini impurity)')
|
||||
# Légende
|
||||
from matplotlib.patches import Patch
|
||||
legend_els = [Patch(color=COLORS[2], label='Features performance'),
|
||||
Patch(color=COLORS[0], label='Features AUM/comportement')]
|
||||
ax5.legend(handles=legend_els, fontsize=8)
|
||||
else:
|
||||
# Afficher le schéma du pipeline à la place
|
||||
ax5.axis('off')
|
||||
pipeline_text = (
|
||||
"PIPELINE — FEATURES ATTENDUES\n\n"
|
||||
"■ AUM features (comportement):\n"
|
||||
" • Value - AUM € (encours actuel)\n"
|
||||
" • aum_lag1, aum_lag3\n"
|
||||
" • aum_growth_1m, aum_growth_3m\n\n"
|
||||
"■ Performance features (moyen terme):\n"
|
||||
" • return_6MoRet\n"
|
||||
" • return_1YrRet\n\n"
|
||||
"■ Relative performance (peer):\n"
|
||||
" • percentile_6MoRet\n"
|
||||
" • percentile_1YrRet\n\n"
|
||||
"→ Relancer avec données complètes\n"
|
||||
" pour obtenir les importances réelles."
|
||||
)
|
||||
ax5.text(0.05, 0.95, pipeline_text, transform=ax5.transAxes,
|
||||
fontsize=9.5, verticalalignment='top', fontfamily='monospace',
|
||||
bbox=dict(boxstyle='round', facecolor='#eaf2fb', alpha=0.8))
|
||||
ax5.set_title('Features du modèle', fontsize=12, fontweight='bold')
|
||||
|
||||
# ── [F] Résultats walk-forward (si disponible) ───────────────────────────────
|
||||
ax6 = fig.add_subplot(gs[3, :])
|
||||
if not results_df.empty:
|
||||
for model_name, grp in results_df.groupby('model'):
|
||||
ax6.plot(grp['test_date'], grp['mae'], marker='o', label=model_name, linewidth=1.5)
|
||||
ax6.axhline(results_df['baseline_mae'].median(), color='black',
|
||||
linestyle='--', label='Baseline (zéro)', linewidth=1.5)
|
||||
ax6.set_title('Walk-Forward Validation — MAE par modèle', fontsize=12, fontweight='bold')
|
||||
ax6.set_ylabel('MAE (€)')
|
||||
ax6.legend()
|
||||
ax6.tick_params(axis='x', rotation=20)
|
||||
else:
|
||||
ax6.axis('off')
|
||||
# Schéma du walk-forward
|
||||
ax6.set_xlim(0, 10)
|
||||
ax6.set_ylim(0, 3)
|
||||
ax6.set_title('Walk-Forward Validation — Schéma', fontsize=12, fontweight='bold')
|
||||
ax6.set_facecolor('white')
|
||||
|
||||
colors_wf = [COLORS[0], COLORS[2], COLORS[3]]
|
||||
for fold_i in range(5):
|
||||
# Fenêtre train
|
||||
ax6.barh(2, fold_i + 2, left=0, height=0.35,
|
||||
color=COLORS[0], alpha=0.3 + fold_i * 0.08)
|
||||
# Fenêtre test
|
||||
ax6.barh(2, 1, left=fold_i + 2, height=0.35, color=COLORS[3], alpha=0.8)
|
||||
|
||||
ax6.text(3, 2.55, 'Train (expanding window)', fontsize=10, color=COLORS[0], fontweight='bold')
|
||||
ax6.text(5.5, 2.55, 'Test', fontsize=10, color=COLORS[3], fontweight='bold')
|
||||
ax6.text(0.2, 1.4,
|
||||
"Fold 1 : train t₁…t₂ → test t₃\n"
|
||||
"Fold 2 : train t₁…t₃ → test t₄\n"
|
||||
"Fold 3 : train t₁…t₄ → test t₅\n"
|
||||
" ...\n"
|
||||
"→ Évite tout data leakage temporel\n"
|
||||
"→ MAE et R² calculés sur chaque fenêtre de test",
|
||||
fontsize=10, fontfamily='monospace',
|
||||
bbox=dict(boxstyle='round', facecolor='#eaf2fb', alpha=0.8))
|
||||
ax6.set_yticks([])
|
||||
ax6.set_xticks([])
|
||||
|
||||
plt.suptitle('Carmignac × ENSAE — Pipeline : Performance → Flux nets',
|
||||
fontsize=15, fontweight='bold', y=1.01)
|
||||
|
||||
output_path = '/mnt/user-data/outputs/carmignac_pipeline_viz.png'
|
||||
plt.savefig(output_path, dpi=150, bbox_inches='tight', facecolor='white')
|
||||
plt.close()
|
||||
print(f"✅ Visualisation sauvegardée : {output_path}")
|
||||
|
||||
# =============================================================================
|
||||
# 8. RÉSUMÉ & INSTRUCTIONS POUR LES DONNÉES COMPLÈTES
|
||||
# =============================================================================
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("8. RÉSUMÉ & PROCHAINES ÉTAPES")
|
||||
print("=" * 60)
|
||||
|
||||
print("""
|
||||
PIPELINE IMPLÉMENTÉ
|
||||
───────────────────
|
||||
Étape 1 — Chargement
|
||||
• equity_stocks_head.csv : AUM mensuels par (compte, fonds, shareclass)
|
||||
• weekly_perf_head.csv : performances hebdomadaires par shareclass
|
||||
|
||||
Étape 2 — Jointure (clé construite)
|
||||
• Parsing shareClass_name → (strategy, type, currency)
|
||||
• Matching vers ISIN via stocks
|
||||
• merge_asof temporel (tolérance ±35j)
|
||||
⚠ En production : utiliser Peers.csv (Morningstar) comme table de référence
|
||||
ISIN complète pour éviter les non-matchés.
|
||||
|
||||
Étape 3 — Feature Engineering
|
||||
• AUM features : lag 1m, lag 3m, croissance 1m, croissance 3m
|
||||
• Perf absolue : return_6MoRet, return_1YrRet (lags à t)
|
||||
• Perf relative : percentile_6MoRet, percentile_1YrRet (vs peers)
|
||||
• Variable cible : ΔAum(t→t+1) [proxy flux nets]
|
||||
⚠ En production : remplacer ΔAum par flux_net = souscriptions - rachats
|
||||
|
||||
Étape 4 — Modèles
|
||||
• Baseline : prédiction zéro
|
||||
• Ridge Regression (linéaire régularisée)
|
||||
• Random Forest (non-linéaire, robuste)
|
||||
• Gradient Boosting (state-of-the-art tabulaire)
|
||||
|
||||
Étape 5 — Validation
|
||||
• Walk-forward expanding window (pas de data leakage)
|
||||
• Métriques : MAE, R²
|
||||
|
||||
POUR LANCER SUR LES DONNÉES COMPLÈTES
|
||||
──────────────────────────────────────
|
||||
1. Remplacer PATH_STOCKS et PATH_PERF par les vrais fichiers
|
||||
2. Ajouter le fichier Peers.csv dans la fonction parse_shareclass_name
|
||||
(jointure directe par ISIN si disponible dans perf complet)
|
||||
3. Remplacer flux_net_proxy par les vraies transactions brutes
|
||||
(fichier flux quotidiens → agrégation mensuelle par compte × fonds)
|
||||
4. Ajouter des features macro (€STR, indices obligataires) depuis
|
||||
market_data/esterRates.csv et Eur_Gov_Indices.xlsx
|
||||
|
||||
LECTURE DES RÉSULTATS
|
||||
──────────────────────
|
||||
La littérature (Sirri & Tufano 1998) prédit une relation CONVEXE :
|
||||
→ Les fonds en haut de percentile (top quartile) attirent des flux
|
||||
disproportionnés
|
||||
→ Les fonds en bas ne perdent pas symétriquement (« smart money »)
|
||||
→ Tester une feature non-linéaire : percentile² ou dummy top/bottom quartile
|
||||
""")
|
||||
12620
client_clusters.csv
12620
client_clusters.csv
File diff suppressed because it is too large
Load Diff
BIN
cluster_map.png
BIN
cluster_map.png
Binary file not shown.
|
Before Width: | Height: | Size: 278 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
|
@ -1,28 +0,0 @@
|
|||
from sklearn.preprocessing import RobustScaler
|
||||
from sklearn.cluster import KMeans
|
||||
|
||||
def run_clustering_pipeline(feature_df, n_clusters=4):
|
||||
"""
|
||||
Scales features and clusters clients.
|
||||
"""
|
||||
# 1. Preprocessing
|
||||
# Fill missing sensitivities with 0 (neutral) for clients with insufficient history
|
||||
df_clean = feature_df.fillna(0)
|
||||
|
||||
# RobustScaler over StandardScaler for financial data bc less influenced by 'Whale' clients.
|
||||
scaler = RobustScaler()
|
||||
scaled_data = scaler.fit_transform(df_clean)
|
||||
|
||||
# 2. Clustering
|
||||
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
|
||||
labels = kmeans.fit_predict(scaled_data)
|
||||
|
||||
# 3. Attach labels
|
||||
results = df_clean.copy()
|
||||
results['Cluster'] = labels
|
||||
|
||||
return results, kmeans.cluster_centers_, scaler # Returns 3 items
|
||||
|
||||
def get_cluster_profiles(results_df):
|
||||
"""Returns the average profile of each cluster."""
|
||||
return results_df.groupby('Cluster').mean()
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
--- Cluster Profiles (Mean Values), original regression ---
|
||||
Cluster 0 1 2
|
||||
total_subs 8.848696e+06 1.866568e+07 0.000000e+00
|
||||
total_reds -9.907930e+06 -1.866568e+07 -7.255456e+03
|
||||
net_flow_vol -1.059233e+06 0.000000e+00 -7.255456e+03
|
||||
txn_count 1.493652e+02 1.000000e+00 2.000000e+00
|
||||
tenure_days 5.199713e+02 0.000000e+00 0.000000e+00
|
||||
buy_sell_ratio 1.508150e+08 3.733136e+13 -1.000000e+00
|
||||
pct_flow_ALTERNATIVE 1.857841e-02 0.000000e+00 0.000000e+00
|
||||
pct_flow_DIVERSIFIED 1.845385e+02 0.000000e+00 1.443736e+10
|
||||
pct_flow_EQUITY 8.731666e-02 0.000000e+00 0.000000e+00
|
||||
pct_flow_FIXED INCOME -1.842562e+02 1.000000e+00 -1.443736e+10
|
||||
pct_flow_NAN 3.976915e-04 0.000000e+00 0.000000e+00
|
||||
pct_flow_PRIVATE ASSETS 9.285151e-04 0.000000e+00 0.000000e+00
|
||||
avg_aum 8.342624e+05 1.482901e+07 2.185214e+04
|
||||
aum_volatility 8.135300e+05 8.274060e+06 2.058599e+02
|
||||
Registrar Account - ID 0.000000e+00 0.000000e+00 0.000000e+00
|
||||
alpha 0.000000e+00 0.000000e+00 0.000000e+00
|
||||
beta_rate 0.000000e+00 0.000000e+00 0.000000e+00
|
||||
beta_bond 0.000000e+00 0.000000e+00 0.000000e+00
|
||||
r_squared 0.000000e+00 0.000000e+00 0.000000e+00
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
import pandas as pd
|
||||
import os
|
||||
import s3fs
|
||||
fs = s3fs.S3FileSystem(
|
||||
client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},
|
||||
key = os.environ["AWS_ACCESS_KEY_ID"],
|
||||
secret = os.environ["AWS_SECRET_ACCESS_KEY"],
|
||||
token = os.environ["AWS_SESSION_TOKEN"])
|
||||
|
||||
def load_and_clean_data(rates_path, gov_path):
|
||||
# Enforce string types for IDs to prevent 'Mixed Type' warnings
|
||||
dtype_spec = {
|
||||
'Registrar Account - ID': str,
|
||||
'Company - Id': str,
|
||||
'Company - Ultimate Parent Id': str,
|
||||
'Agreement - Code': str
|
||||
}
|
||||
with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f:
|
||||
aum = pd.read_csv(f, sep=",", dtype=dtype_spec)
|
||||
|
||||
with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f:
|
||||
flows = pd.read_csv(f, sep=",", dtype=dtype_spec)
|
||||
|
||||
flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])
|
||||
aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])
|
||||
|
||||
# Market data loading (Standardizing dates)
|
||||
print("Loading Market Data...")
|
||||
rates = pd.read_csv(rates_path)
|
||||
try:
|
||||
rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True)
|
||||
except:
|
||||
rates['Date'] = pd.to_datetime(rates['Date'])
|
||||
|
||||
gov = pd.read_csv(gov_path)
|
||||
try:
|
||||
gov['Date'] = pd.to_datetime(gov['Date'], dayfirst=True)
|
||||
except:
|
||||
gov['Date'] = pd.to_datetime(gov['Date'])
|
||||
|
||||
return flows, aum, rates, gov
|
||||
|
|
@ -1,225 +0,0 @@
|
|||
import pandas as pd
|
||||
import statsmodels.api as sm
|
||||
import numpy as np
|
||||
|
||||
def compute_static_features(flows_df, aum_df):
|
||||
"""Generates descriptive features from Flows and AUM."""
|
||||
|
||||
# --- 1. Flow Dynamics ---
|
||||
flow_stats = flows_df.groupby('Registrar Account - ID').agg(
|
||||
total_subs=('Value € - Subscription', 'sum'),
|
||||
total_reds=('Value € - Redemption', 'sum'),
|
||||
net_flow_vol=('Value € - NetFlows', 'sum'),
|
||||
txn_count=('Agreement - Code', 'count'),
|
||||
tenure_days=('Centralisation Date', lambda x: (x.max() - x.min()).days)
|
||||
)
|
||||
|
||||
# Robust Buy/Sell Ratio
|
||||
total_vol = flow_stats['total_subs'].abs() + flow_stats['total_reds'].abs()
|
||||
flow_stats['buy_sell_ratio'] = (flow_stats['total_subs'] - flow_stats['total_reds']) / (total_vol + 1.0)
|
||||
flow_stats['buy_sell_ratio'] = flow_stats['buy_sell_ratio'].clip(-1, 1)
|
||||
|
||||
# --- 2. Product Preferences ---
|
||||
pos_flows = flows_df[flows_df['Value € - Subscription'] > 0]
|
||||
asset_pivot = pos_flows.groupby(['Registrar Account - ID', 'Product - Asset Type'])['Value € - Subscription'].sum().unstack(fill_value=0)
|
||||
|
||||
row_sums = asset_pivot.sum(axis=1)
|
||||
asset_pct = asset_pivot.div(row_sums + 1.0, axis=0).add_prefix('pct_flow_')
|
||||
|
||||
# --- 3. AUM Stats ---
|
||||
aum_stats = aum_df.groupby('Registrar Account - ID').agg(
|
||||
avg_aum=('Value - AUM €', 'mean'),
|
||||
aum_volatility=('Value - AUM €', 'std')
|
||||
)
|
||||
|
||||
features = flow_stats.join(asset_pct).join(aum_stats, how='outer').fillna(0)
|
||||
return features
|
||||
|
||||
def compute_shock_sensitivities(flows_df, aum_df, rates_df, gov_df, freq='ME'):
|
||||
"""
|
||||
Computes sensitivity using Robust OLS + Dynamic Feature Selection.
|
||||
Only targets HIGHLY ACTIVE clients (>= 250 transactions).
|
||||
"""
|
||||
print(f"DEBUG: Computing Sensitivities (Threshold=250)...")
|
||||
|
||||
# --- 1. Prepare Market Factors ---
|
||||
# Force Numeric Types
|
||||
rates_df['Yld to Maturity'] = pd.to_numeric(rates_df['Yld to Maturity'], errors='coerce')
|
||||
gov_df['Total Return % 1-wk-LOC'] = pd.to_numeric(gov_df['Total Return % 1-wk-LOC'], errors='coerce')
|
||||
|
||||
rates_res = rates_df.set_index('Date').resample(freq)['Yld to Maturity'].last()
|
||||
delta_rates = rates_res.diff()
|
||||
|
||||
gov_target = gov_df[gov_df['Bond/Index'] == 'EG04'].set_index('Date')
|
||||
gov_target = gov_target[~gov_target.index.duplicated(keep='first')]
|
||||
gov_res = gov_target['Total Return % 1-wk-LOC'].resample(freq).apply(lambda x: (1 + x/100).prod() - 1)
|
||||
|
||||
market_df = pd.concat([delta_rates.rename('Delta_Rate'), gov_res.rename('Bond_Return')], axis=1).dropna()
|
||||
|
||||
# String Period Index for Robust Merging
|
||||
market_df['Period_Str'] = market_df.index.to_period(freq).astype(str)
|
||||
market_df = market_df.set_index('Period_Str')
|
||||
|
||||
# --- 2. Define Shocks ---
|
||||
rate_q1 = market_df['Delta_Rate'].quantile(0.25)
|
||||
rate_q3 = market_df['Delta_Rate'].quantile(0.75)
|
||||
bond_q1 = market_df['Bond_Return'].quantile(0.25)
|
||||
bond_q3 = market_df['Bond_Return'].quantile(0.75)
|
||||
|
||||
market_df['Rate_Spike'] = (market_df['Delta_Rate'] > rate_q3).astype(int)
|
||||
market_df['Rate_Drop'] = (market_df['Delta_Rate'] < rate_q1).astype(int)
|
||||
market_df['Bond_Rally'] = (market_df['Bond_Return'] > bond_q3).astype(int)
|
||||
market_df['Bond_Crash'] = (market_df['Bond_Return'] < bond_q1).astype(int)
|
||||
|
||||
all_shock_cols = ['Rate_Spike', 'Rate_Drop', 'Bond_Rally', 'Bond_Crash']
|
||||
|
||||
# --- 3. Funneling ---
|
||||
aum_df['Value - AUM €'] = pd.to_numeric(aum_df['Value - AUM €'], errors='coerce')
|
||||
mean_aum = aum_df.groupby('Registrar Account - ID')['Value - AUM €'].mean()
|
||||
valid_aum_clients = mean_aum[mean_aum > 1000].index
|
||||
|
||||
# --- UPDATED THRESHOLD HERE ---
|
||||
txn_counts = flows_df['Registrar Account - ID'].value_counts()
|
||||
active_clients = txn_counts[txn_counts >= 250].index
|
||||
|
||||
eligible_clients = list(set(valid_aum_clients) & set(active_clients))
|
||||
|
||||
print(f"Shock Model Funnel: {len(eligible_clients)} clients eligible (Active >= 250 txns).")
|
||||
|
||||
# --- 4. Regression ---
|
||||
flows_df['Period_Str'] = flows_df['Centralisation Date'].dt.to_period(freq).astype(str)
|
||||
flows_df['Quantity - NetFlows'] = pd.to_numeric(flows_df['Quantity - NetFlows'], errors='coerce')
|
||||
|
||||
client_betas = []
|
||||
success_count = 0
|
||||
failure_printed = False
|
||||
|
||||
for client in eligible_clients:
|
||||
c_flows = flows_df[flows_df['Registrar Account - ID'] == client]
|
||||
c_ts = c_flows.groupby('Period_Str')['Quantity - NetFlows'].sum()
|
||||
|
||||
merged = pd.merge(c_ts, market_df, left_index=True, right_index=True, how='inner')
|
||||
|
||||
if len(merged) >= 6:
|
||||
client_avg_wealth = mean_aum.loc[client]
|
||||
|
||||
# Skip invalid AUM
|
||||
if not np.isfinite(client_avg_wealth) or client_avg_wealth == 0:
|
||||
continue
|
||||
|
||||
Y = merged['Quantity - NetFlows'] / client_avg_wealth
|
||||
|
||||
# --- Dynamic Feature Selection ---
|
||||
# Drop shock columns that are all zeros (event never happened for this client)
|
||||
valid_cols = []
|
||||
for col in all_shock_cols:
|
||||
if merged[col].sum() > 0:
|
||||
valid_cols.append(col)
|
||||
|
||||
X = merged[valid_cols]
|
||||
X = sm.add_constant(X)
|
||||
|
||||
# Check data validity
|
||||
if Y.isna().any() or X.isna().any().any():
|
||||
if not failure_printed:
|
||||
print(f"DEBUG CRASH: Client {client} has NaNs.")
|
||||
failure_printed = True
|
||||
continue
|
||||
|
||||
try:
|
||||
model = sm.OLS(Y, X).fit()
|
||||
|
||||
result_dict = {
|
||||
'Registrar Account - ID': client,
|
||||
'alpha_normal': model.params.get('const', 0),
|
||||
'shock_r_squared': model.rsquared
|
||||
}
|
||||
# Fill missing betas with 0
|
||||
for col in all_shock_cols:
|
||||
result_dict[f'beta_{col.lower()}'] = model.params.get(col, 0)
|
||||
|
||||
client_betas.append(result_dict)
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
if not failure_printed:
|
||||
print(f"DEBUG CRASH: {e}")
|
||||
failure_printed = True
|
||||
continue
|
||||
|
||||
print(f"DEBUG: Successfully modeled {success_count} clients.")
|
||||
|
||||
if not client_betas:
|
||||
return pd.DataFrame(columns=['Registrar Account - ID', 'alpha_normal',
|
||||
'beta_rate_spike', 'beta_rate_drop',
|
||||
'beta_bond_rally', 'beta_bond_crash', 'shock_r_squared'])
|
||||
|
||||
return pd.DataFrame(client_betas).set_index('Registrar Account - ID')
|
||||
|
||||
def compute_linear_sensitivities(flows_df, aum_df, rates_df, gov_df, freq='M'):
|
||||
"""
|
||||
Computes standard linear sensitivity: Flow ~ Alpha + Beta_Rate * dRate + Beta_Bond * BondRet
|
||||
"""
|
||||
print(f"DEBUG: Computing Sensitivities (Linear Model)...")
|
||||
|
||||
# 1. Prepare Market Data
|
||||
rates_df['Yld to Maturity'] = pd.to_numeric(rates_df['Yld to Maturity'], errors='coerce')
|
||||
gov_df['Total Return % 1-wk-LOC'] = pd.to_numeric(gov_df['Total Return % 1-wk-LOC'], errors='coerce')
|
||||
|
||||
rates_res = rates_df.set_index('Date').resample(freq)['Yld to Maturity'].last()
|
||||
delta_rates = rates_res.diff()
|
||||
|
||||
gov_target = gov_df[gov_df['Bond/Index'] == 'EG04'].set_index('Date')
|
||||
gov_target = gov_target[~gov_target.index.duplicated(keep='first')]
|
||||
gov_res = gov_target['Total Return % 1-wk-LOC'].resample(freq).apply(lambda x: (1 + x/100).prod() - 1)
|
||||
|
||||
market_df = pd.concat([delta_rates.rename('Delta_Rate'), gov_res.rename('Bond_Return')], axis=1).dropna()
|
||||
market_df['Period_Str'] = market_df.index.to_period(freq).astype(str)
|
||||
market_df = market_df.set_index('Period_Str')
|
||||
|
||||
# 2. Funneling
|
||||
aum_df['Value - AUM €'] = pd.to_numeric(aum_df['Value - AUM €'], errors='coerce')
|
||||
mean_aum = aum_df.groupby('Registrar Account - ID')['Value - AUM €'].mean()
|
||||
valid_aum_clients = mean_aum[mean_aum > 1000].index
|
||||
|
||||
txn_counts = flows_df['Registrar Account - ID'].value_counts()
|
||||
active_clients = txn_counts[txn_counts >= 250].index
|
||||
eligible_clients = list(set(valid_aum_clients) & set(active_clients))
|
||||
|
||||
print(f"Linear Model Funnel: {len(eligible_clients)} clients eligible.")
|
||||
|
||||
# 3. Regression
|
||||
flows_df['Period_Str'] = flows_df['Centralisation Date'].dt.to_period(freq).astype(str)
|
||||
flows_df['Quantity - NetFlows'] = pd.to_numeric(flows_df['Quantity - NetFlows'], errors='coerce')
|
||||
|
||||
client_betas = []
|
||||
|
||||
for client in eligible_clients:
|
||||
c_flows = flows_df[flows_df['Registrar Account - ID'] == client]
|
||||
c_ts = c_flows.groupby('Period_Str')['Quantity - NetFlows'].sum()
|
||||
|
||||
merged = pd.merge(c_ts, market_df, left_index=True, right_index=True, how='inner')
|
||||
|
||||
if len(merged) >= 6:
|
||||
client_avg_wealth = mean_aum.loc[client]
|
||||
if not np.isfinite(client_avg_wealth) or client_avg_wealth == 0: continue
|
||||
|
||||
Y = merged['Quantity - NetFlows'] / client_avg_wealth
|
||||
X = merged[['Delta_Rate', 'Bond_Return']]
|
||||
X = sm.add_constant(X)
|
||||
|
||||
try:
|
||||
model = sm.OLS(Y, X).fit()
|
||||
client_betas.append({
|
||||
'Registrar Account - ID': client,
|
||||
'alpha_linear': model.params.get('const', 0),
|
||||
'beta_rate_linear': model.params.get('Delta_Rate', 0),
|
||||
'beta_bond_linear': model.params.get('Bond_Return', 0),
|
||||
'linear_r_squared': model.rsquared
|
||||
})
|
||||
except:
|
||||
continue
|
||||
|
||||
if not client_betas:
|
||||
return pd.DataFrame(columns=['Registrar Account - ID', 'alpha_linear', 'beta_rate_linear', 'beta_bond_linear', 'linear_r_squared'])
|
||||
|
||||
return pd.DataFrame(client_betas).set_index('Registrar Account - ID')
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
import pandas as pd
|
||||
from data_loader import load_and_clean_data
|
||||
from features import compute_static_features, compute_shock_sensitivities
|
||||
from clustering import run_clustering_pipeline, get_cluster_profiles
|
||||
|
||||
def main():
|
||||
print("--- Starting Carmignac Client Clustering Pipeline ---")
|
||||
|
||||
print("Loading data...")
|
||||
flows, aum, rates, gov = load_and_clean_data(
|
||||
rates_path='data/str_rates.csv',
|
||||
gov_path='data/eur_gov_indices.csv'
|
||||
)
|
||||
|
||||
# 2. Feature Engineering
|
||||
print("Computing static features...")
|
||||
static_feats = compute_static_features(flows, aum)
|
||||
|
||||
# Option 1: Run Shock Model (Default)
|
||||
sensitivity_feats = compute_shock_sensitivities(flows, aum, rates, gov, freq='ME')
|
||||
|
||||
# Option 2: Run Linear Model (Uncomment to use)
|
||||
# sensitivity_feats = compute_linear_sensitivities(flows, aum, rates, gov, freq='ME')
|
||||
|
||||
# Merge features
|
||||
full_features = static_feats.join(sensitivity_feats, how='left')
|
||||
|
||||
# Fill missing sensitivities with 0 (Passive clients)
|
||||
shock_cols = ['alpha_normal', 'beta_rate_spike', 'beta_rate_drop',
|
||||
'beta_bond_rally', 'beta_bond_crash', 'shock_r_squared']
|
||||
full_features[shock_cols] = full_features[shock_cols].fillna(0)
|
||||
|
||||
print(f"Final Feature Matrix: {full_features.shape}")
|
||||
|
||||
# 3. Clustering
|
||||
print("Running Clustering...")
|
||||
clustered_df, centers, scaler = run_clustering_pipeline(full_features, n_clusters=3)
|
||||
|
||||
# 4. Results
|
||||
print("\n--- Cluster Profiles (Mean Values) ---")
|
||||
profiles = get_cluster_profiles(clustered_df)
|
||||
print(profiles.T)
|
||||
|
||||
clustered_df.to_csv('client_clusters.csv')
|
||||
print("\nResults saved to 'client_clusters.csv'")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
|
||||
def plot_clusters():
|
||||
print("--- Generating Cluster Visualization ---")
|
||||
|
||||
# 1. Load the results from main.py
|
||||
try:
|
||||
df = pd.read_csv('client_clusters.csv', index_col=0)
|
||||
except FileNotFoundError:
|
||||
print("Error: Run main.py first to generate 'client_clusters.csv'")
|
||||
return
|
||||
|
||||
# 2. Prepare Data for PCA
|
||||
# Drop non-numeric or ID columns if any linger (though index handled it)
|
||||
X = df.drop(columns=['Cluster'])
|
||||
|
||||
# Scale (Critical for PCA)
|
||||
scaler = RobustScaler()
|
||||
X_scaled = scaler.fit_transform(X)
|
||||
|
||||
# 3. Run PCA (Reduce to 2 Dimensions)
|
||||
pca = PCA(n_components=2)
|
||||
components = pca.fit_transform(X_scaled)
|
||||
|
||||
# Create plotting DataFrame
|
||||
plot_df = pd.DataFrame(data=components, columns=['PC1', 'PC2'], index=X.index)
|
||||
plot_df['Cluster'] = df['Cluster'].astype(str) # Convert to string for discrete colors
|
||||
|
||||
# 4. Plot
|
||||
plt.figure(figsize=(12, 8))
|
||||
sns.scatterplot(
|
||||
data=plot_df,
|
||||
x='PC1',
|
||||
y='PC2',
|
||||
hue='Cluster',
|
||||
style='Cluster',
|
||||
palette='viridis',
|
||||
s=60,
|
||||
alpha=0.8
|
||||
)
|
||||
|
||||
plt.title('Client Segmentation Map (PCA Projection)', fontsize=16)
|
||||
plt.xlabel(f'Principal Component 1 ({pca.explained_variance_ratio_[0]:.1%} Variance)', fontsize=12)
|
||||
plt.ylabel(f'Principal Component 2 ({pca.explained_variance_ratio_[1]:.1%} Variance)', fontsize=12)
|
||||
plt.legend(title='Cluster ID', bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
plt.grid(True, linestyle='--', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('cluster_map.png', dpi=300)
|
||||
print("Visualization saved to 'cluster_map.png'")
|
||||
|
||||
if __name__ == "__main__":
|
||||
plot_clusters()
|
||||
151156
data_exploration/carmignac_perf.csv
Normal file
151156
data_exploration/carmignac_perf.csv
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -1,151 +0,0 @@
|
|||
,Agreement - Code,Company - Id,Company - Ultimate Parent Id,Registrar Account - ID,Registrar Account - Region,RegistrarAccount - Country,Product - Asset Type,Product - Strategy,Product - Legal Status,Product - Is Dedie ?,Product - Fund,Product - Shareclass Type,Product - Shareclass Currency,Product - Isin,Centralisation Date,Quantity - AUM,Value - AUM CCY,Value - AUM €
|
||||
0,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-02-28,109.305,143606.0021,143606.0021
|
||||
1,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-06-30,103.933,134698.2073,134698.2073
|
||||
2,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-09-30,102.714,142998.4308,142998.4308
|
||||
3,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-11-30,3679.082,959283.8407,959283.8407
|
||||
4,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-12-31,3667.679,878812.5652,878812.5652
|
||||
5,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-08-31,3662.97,963470.9991,963470.9991
|
||||
6,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-10-31,3596.972,966362.4975,966362.4975
|
||||
7,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-04-30,6313.81135,1749430.8489,1749430.8489
|
||||
8,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-08-31,6031.89135,1729343.25,1729343.25
|
||||
9,1010,976.0,16697.0,416573,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2017-02-28,30.832,5696.212,5696.212
|
||||
10,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2015-09-30,10681.012,11312580.2396,11312580.2396
|
||||
11,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-02-28,9.909,1726.346,1726.346
|
||||
12,1010,976.0,16697.0,416573,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2015-06-30,15248.877,4471123.2252,4471123.2252
|
||||
13,1010,976.0,16697.0,416573,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2016-01-31,14252.798,3495641.2375,3495641.2375
|
||||
14,1010,976.0,16697.0,416573,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2017-02-28,10364.128,2718821.6982,2718821.6982
|
||||
15,1010,976.0,16697.0,416573,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2017-03-31,9979.059,2617706.7569,2617706.7569
|
||||
16,1010,976.0,16697.0,416573,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2015-07-31,6933.327,5740170.7566,5740170.7566
|
||||
17,1010,976.0,16697.0,416573,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2016-07-31,6097.532,5045402.8534,5045402.8534
|
||||
18,1010,976.0,16697.0,416573,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2016-09-30,6140.529,5122367.8865,5122367.8865
|
||||
19,1010,976.0,16697.0,416573,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2016-11-30,6201.72,4932724.0536,4932724.0536
|
||||
20,1010,976.0,16697.0,416573,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2016-12-31,5864.435,4711311.146,4711311.146
|
||||
21,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-01-31,246.0,321015.24,321015.24
|
||||
22,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-05-31,207.0,285173.55,285173.55
|
||||
23,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2019-07-31,0.0,0.0,0.0
|
||||
24,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2020-02-29,0.0,0.0,0.0
|
||||
25,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2020-05-31,0.0,0.0,0.0
|
||||
26,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2021-04-30,0.0,0.0,0.0
|
||||
27,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-03-31,18625.0,5724580.0,5724580.0
|
||||
28,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-10-31,17736.0,4634948.88,4634948.88
|
||||
29,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-03-31,18639.0,4590412.92,4590412.92
|
||||
30,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-04-30,19207.0,4973076.44,4973076.44
|
||||
31,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-08-31,18945.0,4983103.35,4983103.35
|
||||
32,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2018-07-31,16786.0,5652685.5,5652685.5
|
||||
33,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2019-02-28,0.0,0.0,0.0
|
||||
34,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2019-03-31,0.0,0.0,0.0
|
||||
35,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2019-04-30,0.0,0.0,0.0
|
||||
36,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2020-02-29,0.0,0.0,0.0
|
||||
37,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2021-01-31,0.0,0.0,0.0
|
||||
38,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2021-05-31,0.0,0.0,0.0
|
||||
39,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-07-31,15336.302,4317015.65,4317015.65
|
||||
40,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-12-31,14127.302,4242004.9715,4242004.9715
|
||||
41,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2017-11-30,18086.302,6565327.626,6565327.626
|
||||
42,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2018-02-28,19086.302,7177785.5931,7177785.5931
|
||||
43,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2018-08-31,15840.302,5931876.293,5931876.293
|
||||
44,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2018-11-30,15497.302,5283650.1439,5283650.1439
|
||||
45,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2019-05-31,0.0,0.0,0.0
|
||||
46,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2020-02-29,0.0,0.0,0.0
|
||||
47,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2021-05-31,0.0,0.0,0.0
|
||||
48,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2015-06-30,246.0,46966.32,46966.32
|
||||
49,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2016-03-31,275.0,45501.5,45501.5
|
||||
50,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2019-02-28,0.0,0.0,0.0
|
||||
51,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2019-03-31,0.0,0.0,0.0
|
||||
52,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2019-06-30,0.0,0.0,0.0
|
||||
53,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2020-09-30,0.0,0.0,0.0
|
||||
54,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2020-12-31,0.0,0.0,0.0
|
||||
55,1012,6340.0,16625.0,416580,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2021-06-30,0.0,0.0,0.0
|
||||
56,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2015-06-30,30289.0,37940304.29,37940304.29
|
||||
57,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2016-01-31,27726.0,29364052.08,29364052.08
|
||||
58,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2017-02-28,23750.0,28166075.0,28166075.0
|
||||
59,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2017-03-31,23296.0,27853396.48,27853396.48
|
||||
60,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2017-04-30,23445.0,28341957.15,28341957.15
|
||||
61,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2017-08-31,22567.0,27121697.61,27121697.61
|
||||
62,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2017-10-31,22215.0,26971231.5,26971231.5
|
||||
63,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2018-09-30,18587.0,22442873.15,22442873.15
|
||||
64,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2019-09-30,0.0,0.0,0.0
|
||||
65,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2019-10-31,0.0,0.0,0.0
|
||||
66,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2020-03-31,0.0,0.0,0.0
|
||||
67,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2020-10-31,0.0,0.0,0.0
|
||||
68,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2021-07-31,0.0,0.0,0.0
|
||||
69,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2021-08-31,0.0,0.0,0.0
|
||||
70,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2015-06-30,4363.0,813132.31,813132.31
|
||||
71,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2015-12-31,3792.0,633643.2,633643.2
|
||||
72,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2016-01-31,3678.0,576820.74,576820.74
|
||||
73,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-03-31,2878.0,505204.12,505204.12
|
||||
74,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-04-30,2816.0,499558.4,499558.4
|
||||
75,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-05-31,2754.0,492966.0,492966.0
|
||||
76,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-10-31,2602.0,461724.9,461724.9
|
||||
77,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2019-06-30,0.0,0.0,0.0
|
||||
78,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2019-08-31,0.0,0.0,0.0
|
||||
79,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2020-03-31,0.0,0.0,0.0
|
||||
80,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2020-08-31,0.0,0.0,0.0
|
||||
81,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2020-09-30,0.0,0.0,0.0
|
||||
82,1012,6340.0,16625.0,416580,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2020-12-31,0.0,0.0,0.0
|
||||
83,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2015-08-31,19515.0,5133225.6,5133225.6
|
||||
84,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2017-02-28,13631.0,3575820.23,3575820.23
|
||||
85,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2017-05-31,12783.0,3382637.46,3382637.46
|
||||
86,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2017-07-31,12444.0,3323543.52,3323543.52
|
||||
87,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2018-03-31,11425.0,2950963.25,2950963.25
|
||||
88,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2018-09-30,10269.0,2634922.71,2634922.71
|
||||
89,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2019-04-30,0.0,0.0,0.0
|
||||
90,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2019-06-30,0.0,0.0,0.0
|
||||
91,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2019-10-31,0.0,0.0,0.0
|
||||
92,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2020-03-31,0.0,0.0,0.0
|
||||
93,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2020-12-31,0.0,0.0,0.0
|
||||
94,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2021-03-31,0.0,0.0,0.0
|
||||
95,1012,6340.0,16625.0,416580,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2021-06-30,0.0,0.0,0.0
|
||||
96,1012,6340.0,16625.0,416580,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2015-05-31,16477.0,14574895.12,14574895.12
|
||||
97,1012,6340.0,16625.0,416580,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2016-02-29,15566.0,11488797.62,11488797.62
|
||||
98,1012,6340.0,16625.0,416580,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2017-07-31,12331.0,11094077.39,11094077.39
|
||||
99,1012,6340.0,16625.0,416580,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2018-03-31,11342.0,9959296.78,9959296.78
|
||||
100,1012,6340.0,16625.0,416580,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2020-05-31,0.0,0.0,0.0
|
||||
101,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-01-31,88.451,115423.2479,115423.2479
|
||||
102,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-03-31,135.771,179295.1095,179295.1095
|
||||
103,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-10-31,108.467,137327.8994,137327.8994
|
||||
104,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-04-30,103.974,131679.9518,131679.9518
|
||||
105,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-05-31,103.952,132387.0301,132387.0301
|
||||
106,1010,976.0,16697.0,416573,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-08-31,103.882,145367.2767,145367.2767
|
||||
107,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-03-31,3916.458,1203762.5309,1203762.5309
|
||||
108,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-10-31,3728.367,974334.1481,974334.1481
|
||||
109,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-03-31,3700.605,911384.9994,911384.9994
|
||||
110,1010,976.0,16697.0,416573,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-05-31,3684.652,940875.8882,940875.8882
|
||||
111,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2015-02-28,4824.05935,1397674.7155,1397674.7155
|
||||
112,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2015-10-31,6215.34535,1813327.0059,1813327.0059
|
||||
113,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-06-30,6185.10035,1664101.2492,1664101.2492
|
||||
114,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-07-31,6119.52035,1722583.7833,1722583.7833
|
||||
115,1010,976.0,16697.0,416573,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2016-09-30,6030.01935,1765408.7651,1765408.7651
|
||||
116,1010,976.0,16697.0,416573,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2015-08-31,18.237,3290.3195,3290.3195
|
||||
117,1010,976.0,16697.0,416573,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2015-11-30,18.237,3222.4779,3222.4779
|
||||
118,1010,976.0,16697.0,416573,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2015-12-31,19.375,3309.0563,3309.0563
|
||||
119,1010,976.0,16697.0,416573,France,France,Equity,Grande Europe,SICAV,NO,Carmignac Portfolio Grande Europe,A,EUR,LU0099161993,2016-02-29,19.354,3080.9633,3080.9633
|
||||
120,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2015-04-30,10632.546,13714070.4817,13714070.4817
|
||||
121,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2016-01-31,10191.753,10793881.7672,10793881.7672
|
||||
122,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2016-11-30,9371.763,10476600.1401,10476600.1401
|
||||
123,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,A,EUR,FR0010148981,2017-01-31,9000.004,10709824.7599,10709824.7599
|
||||
124,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-01-31,9.909,1733.0841,1733.0841
|
||||
125,1010,976.0,16697.0,416573,France,France,Equity,Investissement,FCP,NO,Carmignac Investissement,E,EUR,FR0010312660,2017-03-31,9.908,1739.2503,1739.2503
|
||||
126,1010,976.0,16697.0,416573,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2015-08-31,15389.867,4048150.6157,4048150.6157
|
||||
127,1010,976.0,16697.0,416573,France,France,Equity,Investissement Latitude,FCP,NO,Carmignac Investissement Latitude,A,EUR,FR0010147603,2016-02-29,13674.192,3309838.1736,3309838.1736
|
||||
128,1010,976.0,16697.0,416573,France,France,Equity,Large Cap Emerging Markets Strategy,FCP,NO,Carmignac Emergents,A,EUR,FR0010149302,2015-05-31,7022.043,6211418.3561,6211418.3561
|
||||
129,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2015-07-31,195.0,256183.2,256183.2
|
||||
130,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-07-31,180.0,245372.4,245372.4
|
||||
131,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-09-30,238.0,331343.6,331343.6
|
||||
132,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2016-12-31,250.0,324742.5,324742.5
|
||||
133,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2017-11-30,229.0,346623.56,346623.56
|
||||
134,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2018-02-28,220.0,339028.8,339028.8
|
||||
135,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2018-07-31,181.0,264808.43,264808.43
|
||||
136,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2018-11-30,157.0,224888.37,224888.37
|
||||
137,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2019-05-31,0.0,0.0,0.0
|
||||
138,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2019-12-31,0.0,0.0,0.0
|
||||
139,1012,6340.0,16625.0,416580,France,France,Equity,Asia Discovery,SICAV,NO,Carmignac Portfolio Asia Discovery,A,EUR,LU0336083810,2021-05-31,0.0,0.0,0.0
|
||||
140,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2015-02-28,18779.0,5723651.41,5723651.41
|
||||
141,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2016-09-30,18716.0,4984819.44,4984819.44
|
||||
142,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2018-12-31,0.0,0.0,0.0
|
||||
143,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2019-06-30,0.0,0.0,0.0
|
||||
144,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2019-07-31,0.0,0.0,0.0
|
||||
145,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2019-08-31,0.0,0.0,0.0
|
||||
146,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2020-01-31,0.0,0.0,0.0
|
||||
147,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2020-08-31,0.0,0.0,0.0
|
||||
148,1012,6340.0,16625.0,416580,France,France,Equity,Climate Transition,SICAV,NO,Carmignac Portfolio Climate Transition,A,EUR,LU0164455502,2020-09-30,0.0,0.0,0.0
|
||||
149,1012,6340.0,16625.0,416580,France,France,Equity,Euro-Entrepreneurs,FCP,NO,Carmignac Euro-Entrepreneurs,A,EUR,FR0010149112,2015-01-31,16119.302,4429422.9966,4429422.9966
|
||||
|
|
|
@ -1,151 +0,0 @@
|
|||
,Date,perfPeriod,shareClass_name,return,percentile
|
||||
4,2012-12-29,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,15.3705089425925,2.0
|
||||
11,2012-12-29,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,15.3644241298917,2.0
|
||||
16,2013-01-05,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,15.4876066401655,2.0
|
||||
26,2013-01-05,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,15.4876127738795,2.0
|
||||
32,2013-01-12,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,10.6132138957134,6.0
|
||||
39,2013-01-12,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,10.6132051959445,6.0
|
||||
60,2013-01-19,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,11.4180826933026,4.0
|
||||
62,2013-01-19,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,11.4180849650982,4.0
|
||||
65,2013-01-26,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,9.21618698751112,6.0
|
||||
71,2013-01-26,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,9.21619293712317,6.0
|
||||
84,2013-02-02,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,5.54171915417381,13.0
|
||||
88,2013-02-02,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,5.54171545781454,13.0
|
||||
104,2013-02-09,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,9.22498074879858,4.0
|
||||
110,2013-02-09,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,9.22498227410478,4.0
|
||||
112,2013-02-16,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,7.91648516360137,6.0
|
||||
116,2013-02-16,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,7.91648750656917,6.0
|
||||
138,2013-02-23,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,10.1947819005052,2.0
|
||||
140,2013-02-23,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,10.1947812395711,2.0
|
||||
148,2013-03-02,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,10.0817196532974,2.0
|
||||
159,2013-03-02,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,10.0817180679311,2.0
|
||||
160,2013-03-09,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,11.2873859692377,1.0
|
||||
171,2013-03-09,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,11.2873772339695,1.0
|
||||
185,2013-03-16,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,11.132071839689,1.0
|
||||
187,2013-03-16,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,11.1320689796046,1.0
|
||||
198,2013-03-23,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,8.918741621745,2.0
|
||||
201,2013-03-23,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,8.91874219488516,2.0
|
||||
212,2013-03-30,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,10.1075104158491,2.0
|
||||
217,2013-03-30,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,10.1075087561574,2.0
|
||||
226,2013-04-06,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,7.59430397267171,5.0
|
||||
231,2013-04-06,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,7.59430638417808,5.0
|
||||
245,2013-04-13,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,8.3141020993174,4.0
|
||||
254,2013-04-13,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,8.31410741301051,4.0
|
||||
266,2013-04-20,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,8.26698692090677,4.0
|
||||
268,2013-04-20,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,8.26698224852072,4.0
|
||||
275,2013-04-27,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,8.82522569131341,2.0
|
||||
284,2013-04-27,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,8.82523148148153,2.0
|
||||
295,2013-05-04,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,13.1702748392498,1.0
|
||||
298,2013-05-04,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,13.1702755060169,1.0
|
||||
310,2013-05-11,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,19.3950513214657,1.0
|
||||
313,2013-05-11,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,19.3950597607513,1.0
|
||||
320,2013-05-18,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,19.3165442942429,1.0
|
||||
326,2013-05-18,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,19.3165502625538,1.0
|
||||
344,2013-05-25,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,17.2435790486097,1.0
|
||||
348,2013-05-25,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,17.2435719958093,1.0
|
||||
352,2013-06-01,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,15.4324926617122,1.0
|
||||
364,2013-06-01,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,15.432496307551,1.0
|
||||
369,2013-06-08,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,10.2828973888771,4.0
|
||||
379,2013-06-08,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,10.2829001690717,4.0
|
||||
393,2013-06-15,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,6.85898897299544,12.0
|
||||
395,2013-06-15,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,6.85899380547428,12.0
|
||||
403,2013-06-22,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,4.33306259846757,27.0
|
||||
410,2013-06-22,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,4.33306463826724,27.0
|
||||
424,2013-06-29,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,2.1393665833551,65.0
|
||||
425,2013-06-29,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,2.13936109553532,65.0
|
||||
441,2013-07-06,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,2.79474994342612,46.0
|
||||
447,2013-07-06,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,2.7947573616782,46.0
|
||||
448,2013-07-13,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,2.66481220091013,49.0
|
||||
463,2013-07-13,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,2.66481636020959,49.0
|
||||
478,2013-07-20,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,4.67427803895228,24.0
|
||||
479,2013-07-20,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,4.6742844566247,24.0
|
||||
485,2013-07-27,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,2.96150989391888,38.0
|
||||
488,2013-07-27,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,2.96151723544964,38.0
|
||||
502,2013-08-03,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.378928943840329,89.0
|
||||
503,2013-08-03,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.378931055242138,89.0
|
||||
516,2013-08-10,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-0.409156970016766,100.0
|
||||
525,2013-08-10,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-0.409162848313327,100.0
|
||||
529,2013-08-17,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,1.6325323366156,67.0
|
||||
539,2013-08-17,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,1.63252937890017,67.0
|
||||
555,2013-08-24,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-0.918505268755287,99.0
|
||||
559,2013-08-24,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-0.9184987362297,99.0
|
||||
571,2013-08-31,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-1.62263504511009,100.0
|
||||
572,2013-08-31,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-1.62263993078768,100.0
|
||||
577,2013-09-07,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.55568471495766,83.0
|
||||
581,2013-09-07,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.555687259968685,83.0
|
||||
592,2013-09-14,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.521128988730579,84.0
|
||||
605,2013-09-14,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.521129672440046,84.0
|
||||
609,2013-09-21,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.397126184144359,88.0
|
||||
610,2013-09-21,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.397129584306888,88.0
|
||||
624,2013-09-28,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.930223870142233,75.0
|
||||
630,2013-09-28,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.930225849019251,75.0
|
||||
649,2013-10-05,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,1.45211017035969,64.0
|
||||
653,2013-10-05,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,1.45211516765063,64.0
|
||||
668,2013-10-12,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,1.70067838512971,52.0
|
||||
669,2013-10-12,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,1.70067709549377,52.0
|
||||
674,2013-10-19,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,2.32924486190917,37.0
|
||||
678,2013-10-19,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,2.32924792131974,37.0
|
||||
700,2013-10-26,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.437101792858163,85.0
|
||||
702,2013-10-26,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.437100951096125,85.0
|
||||
708,2013-11-02,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,0.60566080382416,84.0
|
||||
716,2013-11-02,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,0.605655225494206,84.0
|
||||
729,2013-11-09,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,1.31248488456455,69.0
|
||||
731,2013-11-09,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,1.31248172197183,69.0
|
||||
741,2013-11-16,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,1.20409597265383,70.0
|
||||
745,2013-11-16,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,1.2040967400369,70.0
|
||||
766,2013-11-23,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-0.561222135044104,99.0
|
||||
771,2013-11-23,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-0.524861457092485,99.0
|
||||
776,2013-11-30,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-1.37245752784392,99.0
|
||||
781,2013-11-30,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-1.43462607665321,99.0
|
||||
791,2013-12-07,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-3.22887163088167,100.0
|
||||
800,2013-12-07,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-3.15657846488079,100.0
|
||||
807,2013-12-14,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-2.98614825608208,100.0
|
||||
810,2013-12-14,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-2.88663743039842,100.0
|
||||
832,2013-12-21,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-2.95705555098802,100.0
|
||||
835,2013-12-21,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-2.8361619178927,100.0
|
||||
841,2013-12-28,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-5.35592499756432,100.0
|
||||
845,2013-12-28,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-5.21920020999477,100.0
|
||||
859,2014-01-04,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-2.41628948088483,99.0
|
||||
871,2014-01-04,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-2.57401649317295,99.0
|
||||
876,2014-01-11,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-3.91864938726083,100.0
|
||||
879,2014-01-11,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-3.93238863143234,100.0
|
||||
903,2014-01-18,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-1.99789192668233,99.0
|
||||
905,2014-01-18,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-1.99245802966026,99.0
|
||||
916,2014-01-25,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-4.13735923320824,100.0
|
||||
923,2014-01-25,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-4.14492272157965,100.0
|
||||
931,2014-02-01,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-6.28505093792537,100.0
|
||||
941,2014-02-01,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-6.29780018740799,100.0
|
||||
947,2014-02-08,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-8.44607756330799,100.0
|
||||
959,2014-02-08,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-8.41559067497243,100.0
|
||||
963,2014-02-15,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-8.95453656519236,100.0
|
||||
976,2014-02-15,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-9.0017245020061,100.0
|
||||
981,2014-02-22,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-10.9427369974399,100.0
|
||||
993,2014-02-22,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-11.0069330947752,100.0
|
||||
1002,2014-03-01,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-10.5005288981599,100.0
|
||||
1004,2014-03-01,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-10.5856489174059,100.0
|
||||
1013,2014-03-08,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-8.5403242569113,100.0
|
||||
1024,2014-03-08,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-8.44391605238755,100.0
|
||||
1032,2014-03-15,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-8.68625962006662,100.0
|
||||
1038,2014-03-15,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-8.80381271603646,100.0
|
||||
1057,2014-03-22,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-10.1067403784819,100.0
|
||||
1058,2014-03-22,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-9.97753020782063,100.0
|
||||
1065,2014-03-29,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-5.5467262788383,100.0
|
||||
1073,2014-03-29,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-5.38639265439276,100.0
|
||||
1079,2014-04-05,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-3.77729086099889,100.0
|
||||
1093,2014-04-05,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-3.59563303774362,100.0
|
||||
1100,2014-04-12,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-3.87429651766745,100.0
|
||||
1105,2014-04-12,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-4.06708815376412,100.0
|
||||
1117,2014-04-19,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-4.9077544578913,100.0
|
||||
1120,2014-04-19,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-4.69717499564392,100.0
|
||||
1130,2014-04-26,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-7.06804958259555,100.0
|
||||
1131,2014-04-26,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-6.85918742997025,100.0
|
||||
1157,2014-05-03,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-8.12180211309723,100.0
|
||||
1165,2014-05-03,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-7.90343047610594,100.0
|
||||
1178,2014-05-10,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-8.23972812481176,100.0
|
||||
1182,2014-05-10,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-8.47546748167871,100.0
|
||||
1191,2014-05-17,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-5.34726233227029,100.0
|
||||
1201,2014-05-17,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-5.08977550721322,100.0
|
||||
1205,2014-05-24,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,-1.64336774284573,100.0
|
||||
1216,2014-05-24,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,-1.92459794358029,100.0
|
||||
1234,2014-05-31,1YrRet,Carmignac Pf Asia Discovery F EUR Acc,3.70763707159587,18.0
|
||||
1236,2014-05-31,1YrRet,Carmignac Pf Asia Discovery A EUR Acc,3.41910814400184,21.0
|
||||
|
424822
dataset_features.csv
Normal file
424822
dataset_features.csv
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -35,7 +35,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -48,7 +48,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -58,13 +58,22 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"weekly_perf[\"Date\"] = pd.to_datetime(weekly_perf['Date'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"weekly_perf.to_csv('weekly_perf_full.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
|
@ -109,7 +118,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -119,12 +128,11 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"carmignac_weekly_perf_head = carmignac_funds.head(150)\n",
|
||||
"carmignac_weekly_perf_head.to_csv(\"weekly_perf_head.csv\")"
|
||||
"carmignac_funds.to_csv(\"carmignac_perf.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
28
peers_summary.csv
Normal file
28
peers_summary.csv
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
strategy,n_carmignac_sc,n_competitors,n_index_funds,ms_categories,broad_category
|
||||
CAD,2,27,2,"EAA Fund Asia ex-Japan Equity, EAA Fund Asia ex-Japan Small/Mid-Cap Equity, EAA Fund Asia-Pacific Equity, EAA Fund Asia-Pacific ex-Japan Equity, EAA Fund Global Emerging Markets ex-China Equity",Equity
|
||||
CARE,2,22,0,"EAA Fund Equity Market Neutral EUR, EAA Fund Long/Short Equity - Global, EAA Fund Long/Short Equity - Europe, EAA Fund Macro Trading EUR",Alternative
|
||||
CCNE,1,28,0,"EAA Fund Greater China Equity, EAA Fund China Equity, EAA Fund China Equity - A Shares",Equity
|
||||
CCR,3,36,1,"EAA Fund EUR Corporate Bond, EAA Fund EUR Flexible Bond, EAA Fund Global Flexible Bond - EUR Hedged, EAA Fund EUR High Yield Bond, EAA Fund Global Corporate Bond - EUR Hedged",Fixed Income
|
||||
CEMD,1,34,0,"EAA Fund Global Emerging Markets Bond, EAA Fund Global Emerging Markets Bond - EUR Hedged, EAA Fund Other Bond, EAA Fund Global Emerging Markets Bond - Local Currency, Global Emerging Markets Bond, Global Emerging Markets Bond - EUR Hedged, Global Emerging Markets Bond - Local Currency",Fixed Income
|
||||
CEMP,2,11,0,"EAA Fund Global Emerging Markets Allocation, EAA Fund Other Allocation, EAA Fund Asia Allocation, EAA Fund Greater China Allocation, Global Emerging Markets Allocation",Allocation
|
||||
CE,3,40,1,"EAA Fund Global Emerging Markets Equity, Global Emerging Markets Equity",Equity
|
||||
CFB,2,20,1,"EAA Fund EUR Flexible Bond, EAA Fund EUR Diversified Bond, EAA Fund Global Flexible Bond - EUR Hedged, EAA Fund Global Diversified Bond - EUR Hedged, EUR Flexible Bond",Fixed Income
|
||||
CFG,1,10,0,"EAA Fund Europe ex-UK Small/Mid-Cap Equity, EAA Fund Europe Flex-Cap Equity, EAA Fund Europe Mid-Cap Equity, EAA Fund Europe Small-Cap Equity, EAA Fund Eurozone Large-Cap Equity, EAA Fund Eurozone Mid-Cap Equity, EAA Fund Global Flex-Cap Equity, EAA Fund Global Large-Cap Growth Equity",Equity
|
||||
CGB,2,35,2,"EAA Fund Global Diversified Bond, EAA Fund Global Flexible Bond - EUR Hedged, Global Diversified Bond, EAA Fund Global Flexible Bond, EAA Fund Other Bond, EAA Fund EUR Diversified Bond - Short Term, EAA Fund EUR Flexible Bond, EAA Fund Global Government Bond, EAA Fund Global Corporate Bond - EUR Hedged, EAA Fund Global Diversified Bond - EUR Hedged, EAA Fund Global Government Bond - EUR Hedged",Fixed Income
|
||||
CGC,1,22,0,"EAA Fund Global Large-Cap Growth Equity, EAA Fund Other Equity, EAA Fund Global Large-Cap Blend Equity",Equity
|
||||
CGE,2,52,0,"EAA Fund Europe Large-Cap Blend Equity, EAA Fund Europe Large-Cap Growth Equity, EAA Fund Europe Large-Cap Value Equity, EAA Fund Eurozone Large-Cap Equity, EAA Fund Europe Flex-Cap Equity, EAA Fund Europe Equity Income, Europe Large-Cap Growth Equity",Equity
|
||||
CHX,1,10,0,"EAA Fund Europe Large-Cap Blend Equity, EAA Fund Europe Mid-Cap Equity, EAA Fund Eurozone Flex-Cap Equity, EAA Fund Eurozone Large-Cap Equity, EAA Fund Global Large-Cap Blend Equity, EAA Fund Global Large-Cap Growth Equity, EAA Fund Other Equity, EAA Fund Sector Equity Consumer Goods & Services, EAA Fund Sector Equity Ecology",Equity
|
||||
CIL,2,12,0,"EAA Fund EUR Flexible Allocation - Global, EAA Fund EUR Flexible Allocation, EAA Fund EUR Moderate Allocation - Global, EAA Fund EUR Cautious Allocation - Global, EUR Flexible Allocation - Global",Allocation
|
||||
CI,3,28,0,"EAA Fund Global Large-Cap Growth Equity, EAA Fund Global Large-Cap Value Equity, EAA Fund Global Large-Cap Blend Equity, EAA Fund Other Equity, EAA Fund Global Equity Income, EAA Fund Global Flex-Cap Equity, EAA Fund Europe Flex-Cap Equity",Equity
|
||||
CMAP,1,21,0,"EAA Fund Event Driven, EAA Fund Relative Value Arbitrage",Alternative
|
||||
CMA,1,4,0,EAA Fund Event Driven,Alternative
|
||||
CPE,2,19,0,"EAA Fund EUR Moderate Allocation, EAA Fund EUR Cautious Allocation, EAA Fund EUR Flexible Allocation, EAA Fund EUR Aggressive Allocation, EAA Fund EUR Moderate Allocation - Global, EUR Moderate Allocation",Allocation
|
||||
CPI,2,18,0,"EAA Fund EUR Flexible Allocation - Global, EAA Fund EUR Moderate Allocation - Global, EAA Fund EUR Flexible Allocation, EAA Fund EUR Cautious Allocation - Global, EAA Fund Other Allocation, EAA Fund USD Moderate Allocation, EAA Fund EUR Cautious Allocation, EAA Fund Macro Trading EUR, EAA Fund GBP Flexible Allocation, EAA Fund Global Inflation-Linked Bond - EUR Hedged, EAA Fund Commodities - Broad Basket",Allocation
|
||||
CP,2,34,0,"EAA Fund EUR Moderate Allocation - Global, EAA Fund USD Moderate Allocation, EAA Fund EUR Flexible Allocation - Global, EAA Fund EUR Cautious Allocation - Global, EAA Fund EUR Aggressive Allocation - Global, EAA Fund EUR Cautious Allocation, EAA Fund EUR Flexible Allocation, EAA Fund EUR Diversified Bond, EAA Fund EUR Moderate Allocation, EUR Moderate Allocation - Global",Allocation
|
||||
CS,2,27,2,"EAA Fund EUR Diversified Bond - Short Term, EAA Fund EUR Government Bond - Short Term, EAA Fund Global Flexible Bond - EUR Hedged, EAA Fund EUR Ultra Short-Term Bond, EAA Fund EUR Flexible Bond, EAA Fund EUR Corporate Bond - Short Term, EAA Fund EUR Diversified Bond, EAA Fund EUR Corporate Bond",Fixed Income
|
||||
CTS,2,24,0,"EAA Fund Sector Equity Technology, EAA Fund US Flex-Cap Equity, Sector Equity Technology",Equity
|
||||
PLSEE,2,27,0,"EAA Fund Long/Short Equity - Global, EAA Fund Equity Market Neutral EUR, EAA Fund Long/Short Equity - Europe, EAA Fund Long/Short Equity - Other, EAA Fund Europe Large-Cap Blend Equity",Equity
|
||||
UKCEL,2,27,0,"EAA Fund Europe ex-UK Equity, EAA Fund Europe ex-UK Small/Mid-Cap Equity, EAA Fund Other Equity, EAA Fund Europe Large-Cap Blend Equity",Equity
|
||||
UKCE,2,21,0,EAA Fund Global Emerging Markets Equity,Equity
|
||||
UKCGB,5,26,0,"EAA Fund Global Flexible Bond - GBP Hedged, EAA Fund Global Flexible Bond, EAA Fund Global Diversified Bond, EAA Fund Global Diversified Bond - GBP Hedged, EAA Fund GBP Allocation 0-20% Equity",Fixed Income
|
||||
UKCGEC,3,17,0,"EAA Fund Global Large-Cap Growth Equity, EAA Fund Global Large-Cap Blend Equity",Equity
|
||||
|
Binary file not shown.
|
|
@ -1,19 +0,0 @@
|
|||
import os
|
||||
import s3fs
|
||||
import pandas as pd
|
||||
|
||||
def load():
|
||||
"""Loading the csv fils and converting them to dataframes"""
|
||||
fs = s3fs.S3FileSystem(
|
||||
client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},
|
||||
key = os.environ["AWS_ACCESS_KEY_ID"],
|
||||
secret = os.environ["AWS_SECRET_ACCESS_KEY"],
|
||||
token = os.environ["AWS_SESSION_TOKEN"])
|
||||
|
||||
with fs.open('s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as aum:
|
||||
df_aum = pd.read_csv(aum, sep=";")
|
||||
|
||||
with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as flows:
|
||||
df_flows = pd.read_csv(flows, sep=";")
|
||||
|
||||
return df_aum, df_flows
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from datetime import datetime, timedelta
|
||||
from load import load
|
||||
|
||||
class AssetReconciler:
|
||||
|
||||
def __init__(self, df_aum, df_flows):
|
||||
"""
|
||||
Initialize with the raw AUM and Flows dataframes.
|
||||
"""
|
||||
self.df_aum = df_aum.copy()
|
||||
self.df_flows = df_flows.copy()
|
||||
|
||||
# Basic cleaning
|
||||
print("Parsing dates...")
|
||||
self.df_aum['Centralisation Date'] = pd.to_datetime(self.df_aum['Centralisation Date'], errors='coerce')
|
||||
self.df_flows['Centralisation Date'] = pd.to_datetime(self.df_flows['Centralisation Date'], errors='coerce')
|
||||
|
||||
# Standardize column names for internal use
|
||||
self.col_id = 'Registrar Account - ID'
|
||||
self.col_isin = 'Product - Isin'
|
||||
self.col_qty_aum = 'Quantity - AUM'
|
||||
self.col_qty_flow = 'Quantity - NetFlows'
|
||||
self.col_block = 'Registrar Account - Country'
|
||||
|
||||
def _get_portfolio_matrix(self, df, date_filter):
|
||||
"""
|
||||
Pivots data to create a matrix: Index=AccountID, Columns=ISINs, Values=Quantity
|
||||
"""
|
||||
subset = df[df['Centralisation Date'] == date_filter]
|
||||
if subset.empty:
|
||||
print(f"Warning: No data found for date {date_filter}")
|
||||
return pd.DataFrame()
|
||||
|
||||
# Pivot: Rows are Clients, Columns are ISINs
|
||||
matrix = subset.pivot_table(
|
||||
index=self.col_id,
|
||||
columns=self.col_isin,
|
||||
values=self.col_qty_aum,
|
||||
aggfunc='sum'
|
||||
).fillna(0)
|
||||
|
||||
return matrix
|
||||
|
||||
def _get_aggregated_flows(self, start_date, end_date):
|
||||
"""
|
||||
Sums up net flows per Account/ISIN between two dates.
|
||||
"""
|
||||
mask = (self.df_flows['Centralisation Date'] > start_date) & (self.df_flows['Centralisation Date'] <= end_date)
|
||||
subset = self.df_flows[mask]
|
||||
|
||||
if subset.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
flow_matrix = subset.pivot_table(
|
||||
index=self.col_id,
|
||||
columns=self.col_isin,
|
||||
values=self.col_qty_flow,
|
||||
aggfunc='sum'
|
||||
).fillna(0)
|
||||
|
||||
return flow_matrix
|
||||
|
||||
def match_accounts(self, date_past, date_current, similarity_threshold=0.95, magnitude_tolerance=0.2):
|
||||
"""
|
||||
Main logic to link Past Accounts to Current Accounts.
|
||||
"""
|
||||
print(f"--- Running Matching Algorithm ---")
|
||||
print(f"Comparing State: {date_past.date()} -> {date_current.date()}")
|
||||
|
||||
# 1. Get AUM Snapshots
|
||||
mat_past = self._get_portfolio_matrix(self.df_aum, date_past)
|
||||
mat_curr = self._get_portfolio_matrix(self.df_aum, date_current)
|
||||
|
||||
if mat_past.empty or mat_curr.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# 2. Get Flows
|
||||
mat_flows = self._get_aggregated_flows(date_past, date_current)
|
||||
|
||||
# 3. Blocking Strategy
|
||||
# We only compare accounts if they belong to the same Country (or other stable attribute)
|
||||
# To do this efficiently, we create a mapping of ID -> Country
|
||||
past_countries = self.df_aum[self.df_aum['Centralisation Date'] == date_past].set_index(self.col_id)[self.col_block].to_dict()
|
||||
|
||||
results = []
|
||||
|
||||
# Iterate through unique countries to reduce matrix size (Blocking)
|
||||
unique_countries = set(past_countries.values())
|
||||
|
||||
for country in unique_countries:
|
||||
# Filter matrices for this block
|
||||
# Identify IDs in this country
|
||||
ids_in_country_past = [i for i in mat_past.index if past_countries.get(i) == country]
|
||||
|
||||
# Note: For Current IDs, we need to fetch their country too.
|
||||
curr_countries = self.df_aum[self.df_aum['Centralisation Date'] == date_current].set_index(self.col_id)[self.col_block].to_dict()
|
||||
ids_in_country_curr = [i for i in mat_curr.index if curr_countries.get(i) == country]
|
||||
|
||||
if not ids_in_country_past or not ids_in_country_curr:
|
||||
continue
|
||||
|
||||
# Slice the matrices
|
||||
block_past = mat_past.loc[ids_in_country_past]
|
||||
block_curr = mat_curr.loc[ids_in_country_curr]
|
||||
|
||||
# Align Flows to this block
|
||||
block_flows = pd.DataFrame(0, index=block_past.index, columns=block_past.columns)
|
||||
if not mat_flows.empty:
|
||||
# Only use flows for IDs that exist in the past block
|
||||
common_ids = block_past.index.intersection(mat_flows.index)
|
||||
common_isins = block_past.columns.intersection(mat_flows.columns)
|
||||
if not common_ids.empty:
|
||||
block_flows.loc[common_ids, common_isins] = mat_flows.loc[common_ids, common_isins]
|
||||
|
||||
# 4. Reconstruction: Expected State = Past + Flows
|
||||
expected_state = block_past.add(block_flows, fill_value=0)
|
||||
|
||||
# 5. Align Dimensions (Union of ISINs)
|
||||
all_isins = list(set(expected_state.columns) | set(block_curr.columns))
|
||||
vec_expected = expected_state.reindex(columns=all_isins, fill_value=0)
|
||||
vec_actual = block_curr.reindex(columns=all_isins, fill_value=0)
|
||||
|
||||
# 6. Calculate Cosine Similarity
|
||||
# Result is a matrix: Rows=PastIDs, Cols=CurrentIDs
|
||||
sim_matrix = cosine_similarity(vec_expected, vec_actual)
|
||||
|
||||
# 7. Find Best Matches
|
||||
for idx, past_id in enumerate(vec_expected.index):
|
||||
# Find best score in the row
|
||||
best_idx = np.argmax(sim_matrix[idx])
|
||||
best_score = sim_matrix[idx][best_idx]
|
||||
curr_id = vec_actual.index[best_idx]
|
||||
|
||||
# 8. Magnitude Check (Euclidean safeguard)
|
||||
# Ensure we don't match a small retail client to a huge institutional one
|
||||
total_shares_exp = vec_expected.loc[past_id].sum()
|
||||
total_shares_act = vec_actual.loc[curr_id].sum()
|
||||
|
||||
# Calculate ratio (handle div by zero)
|
||||
if total_shares_act == 0:
|
||||
ratio = 0
|
||||
else:
|
||||
ratio = total_shares_exp / total_shares_act
|
||||
|
||||
# Check if ratio is within tolerance (e.g. 0.8 to 1.2)
|
||||
is_magnitude_ok = (1 - magnitude_tolerance) <= ratio <= (1 + magnitude_tolerance)
|
||||
|
||||
match_status = "Unmatched"
|
||||
if best_score >= similarity_threshold and is_magnitude_ok:
|
||||
match_status = "High Confidence Match"
|
||||
elif past_id == curr_id:
|
||||
match_status = "Same ID (Retained)"
|
||||
|
||||
results.append({
|
||||
'Past_ID': past_id,
|
||||
'Predicted_Current_ID': curr_id,
|
||||
'Similarity_Score': round(best_score, 4),
|
||||
'Magnitude_Ratio': round(ratio, 4),
|
||||
'Match_Status': match_status,
|
||||
'Country': country
|
||||
})
|
||||
|
||||
return pd.DataFrame(results)
|
||||
|
||||
# ==========================================
|
||||
# MAIN EXECUTION
|
||||
# ==========================================
|
||||
if __name__ == "__main__":
|
||||
# 1. Load Data
|
||||
df_aum, df_flows = load()
|
||||
|
||||
# 2. Initialize Logic
|
||||
reconciler = AssetReconciler(df_aum, df_flows)
|
||||
|
||||
# 3. AUTO-DETECT DATES
|
||||
available_dates = sorted(reconciler.df_aum['Centralisation Date'].unique())
|
||||
|
||||
if len(available_dates) < 2:
|
||||
print("\nERROR: Not enough dates found in data to perform comparison.")
|
||||
print(f"Dates found: {[str(d) for d in available_dates]}")
|
||||
else:
|
||||
# Automatically pick the First and Last date found in the file
|
||||
date_past = available_dates[0] # First date
|
||||
date_current = available_dates[-1] # Last date
|
||||
|
||||
print(f"\nAuto-Detected Analysis Period:")
|
||||
print(f"Start (Past): {date_past}")
|
||||
print(f"End (Current): {date_current}")
|
||||
|
||||
# 4. Run Linkage
|
||||
results = reconciler.match_accounts(date_past, date_current)
|
||||
|
||||
# 5. Calculate KPI
|
||||
if not results.empty:
|
||||
total_past = len(results)
|
||||
matches = len(results[results['Match_Status'].isin(['High Confidence Match', 'Same ID (Retained)'])])
|
||||
kpi_percentage = (matches / total_past) * 100
|
||||
|
||||
print("\n" + "="*40)
|
||||
print("FINAL RESULTS")
|
||||
print("="*40)
|
||||
print(results[['Past_ID', 'Predicted_Current_ID', 'Similarity_Score', 'Magnitude_Ratio', 'Match_Status']])
|
||||
print("-" * 40)
|
||||
print(f"KPI: {kpi_percentage:.1f}% of Past Client Codes successfully linked to Current Codes.")
|
||||
print("="*40)
|
||||
else:
|
||||
print("No matches found.")
|
||||
199946
relative_performance.csv
Normal file
199946
relative_performance.csv
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/__pycache__/feature_engineering.cpython-313.pyc
Normal file
BIN
src/__pycache__/feature_engineering.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/peers_loader.cpython-313.pyc
Normal file
BIN
src/__pycache__/peers_loader.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/predictive_model.cpython-313.pyc
Normal file
BIN
src/__pycache__/predictive_model.cpython-313.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/relative_performance.cpython-313.pyc
Normal file
BIN
src/__pycache__/relative_performance.cpython-313.pyc
Normal file
Binary file not shown.
480
src/feature_engineering.py
Normal file
480
src/feature_engineering.py
Normal file
|
|
@ -0,0 +1,480 @@
|
|||
"""
|
||||
feature_engineering.py
|
||||
───────────────────────
|
||||
Construction du dataset de features pour la modélisation prédictive.
|
||||
|
||||
Ce module assemble trois familles de features :
|
||||
|
||||
[A] Features comportementales client (depuis stocks/AUM)
|
||||
- Encours actuel et lags (1m, 3m, 6m)
|
||||
- Croissance de l'AUM sur différentes fenêtres
|
||||
- Concentration du portefeuille client (part du fonds dans son total)
|
||||
|
||||
[B] Features de performance absolue (depuis weekly_perf)
|
||||
- Rendements 6Mo et 1Yr du fonds détenu
|
||||
- Percentile Morningstar (rang brut dans la catégorie)
|
||||
|
||||
[C] Features de performance relative (depuis relative_performance.py)
|
||||
- Spread vs médiane des vrais peers
|
||||
- Rang dans le groupe de peers restreint
|
||||
- Momentum de rang, ratio d'outperformance
|
||||
- Dummies top/bottom quartile (lien avec la relation convexe de Sirri & Tufano)
|
||||
|
||||
Variable cible :
|
||||
flux_net_proxy = ΔAum(t → t+1)
|
||||
→ À remplacer par les flux transactionnels bruts dès que disponibles.
|
||||
|
||||
Usage :
|
||||
from peers_loader import PeersLoader
|
||||
from relative_performance import RelativePerformanceCalculator
|
||||
from feature_engineering import FeatureBuilder
|
||||
|
||||
loader = PeersLoader("peers/").load()
|
||||
calc = RelativePerformanceCalculator(loader)
|
||||
builder = FeatureBuilder(loader, calc)
|
||||
|
||||
dataset = builder.build(
|
||||
stocks_df = stocks,
|
||||
perf_df = weekly_perf,
|
||||
target_lag = 1 # prédire les flux à t+1 mois
|
||||
)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ── Constantes ────────────────────────────────────────────────────────────────
|
||||
|
||||
# Périodes de performance à inclure comme features
|
||||
PERF_PERIODS_TO_USE = ["6MoRet", "1YrRet"]
|
||||
|
||||
# Lags AUM (en mois)
|
||||
AUM_LAGS = [1, 3, 6]
|
||||
|
||||
# Fenêtre de tolérance pour le merge_asof (en jours)
|
||||
MERGE_ASOF_TOLERANCE_DAYS = 35
|
||||
|
||||
|
||||
# ── Classe principale ─────────────────────────────────────────────────────────
|
||||
|
||||
class FeatureBuilder:
|
||||
"""
|
||||
Construit le dataset de modélisation en assemblant les trois familles
|
||||
de features.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
loader : PeersLoader (déjà chargé)
|
||||
rel_calc : RelativePerformanceCalculator
|
||||
"""
|
||||
|
||||
def __init__(self, loader, rel_calc):
|
||||
self.loader = loader
|
||||
self.rel_calc = rel_calc
|
||||
|
||||
# ── Point d'entrée principal ──────────────────────────────────────────────
|
||||
|
||||
def build(self,
|
||||
stocks_df: pd.DataFrame,
|
||||
perf_df: pd.DataFrame,
|
||||
target_lag: int = 1,
|
||||
perf_periods: list[str] | None = None,
|
||||
verbose: bool = True) -> pd.DataFrame:
|
||||
"""
|
||||
Construit le dataset final avec features et variable cible.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
stocks_df : AUM mensuels (equity_stocks_full.csv ou similaire)
|
||||
perf_df : performances hebdomadaires (weekly_perf_full.csv)
|
||||
target_lag : horizon de prédiction en mois (1 = flux du mois suivant)
|
||||
perf_periods: périodes de perf à utiliser (défaut : PERF_PERIODS_TO_USE)
|
||||
|
||||
Retourne
|
||||
--------
|
||||
DataFrame avec une ligne par (compte × fonds × mois)
|
||||
contenant toutes les features et la variable cible.
|
||||
"""
|
||||
if perf_periods is None:
|
||||
perf_periods = [p for p in PERF_PERIODS_TO_USE
|
||||
if p in perf_df["perfPeriod"].unique()]
|
||||
if not perf_periods:
|
||||
# Fallback : utiliser toutes les périodes disponibles
|
||||
perf_periods = perf_df["perfPeriod"].unique().tolist()
|
||||
|
||||
if verbose:
|
||||
print("── FeatureBuilder ──────────────────────────────────────")
|
||||
print(f"Périodes de performance utilisées : {perf_periods}")
|
||||
|
||||
# Étape 1 : résolution ISIN dans perf via peers
|
||||
perf_df = self._resolve_perf_isin(perf_df)
|
||||
|
||||
# Étape 2 : calcul des métriques relatives
|
||||
if verbose:
|
||||
print("\nCalcul des performances relatives...")
|
||||
rel_df = self.rel_calc.compute(perf_df, perf_periods=perf_periods,
|
||||
verbose=verbose)
|
||||
|
||||
# Étape 3 : features client (AUM)
|
||||
if verbose:
|
||||
print("\nConstruction des features AUM...")
|
||||
stocks_feat = self._build_aum_features(stocks_df)
|
||||
|
||||
# Étape 4 : features de performance absolue
|
||||
if verbose:
|
||||
print("\nJointure des performances absolues...")
|
||||
perf_abs = self._build_absolute_perf_features(perf_df, perf_periods)
|
||||
|
||||
# Étape 5 : jointure AUM × perf absolue
|
||||
dataset = self._merge_aum_and_perf(stocks_feat, perf_abs, verbose)
|
||||
|
||||
# Étape 6 : jointure avec les métriques relatives
|
||||
if not rel_df.empty:
|
||||
if verbose:
|
||||
print("\nJointure des métriques relatives...")
|
||||
dataset = self._merge_relative_perf(dataset, rel_df, verbose)
|
||||
|
||||
# Étape 7 : variable cible
|
||||
if verbose:
|
||||
print(f"\nConstruction de la variable cible (lag = {target_lag} mois)...")
|
||||
dataset = self._build_target(dataset, lag=target_lag)
|
||||
|
||||
# Étape 8 : nettoyage final
|
||||
dataset = self._final_cleanup(dataset, verbose)
|
||||
|
||||
if verbose:
|
||||
self._print_dataset_summary(dataset)
|
||||
|
||||
return dataset
|
||||
|
||||
# ── Étape 1 : résolution ISIN dans perf ──────────────────────────────────
|
||||
|
||||
def _resolve_perf_isin(self, perf_df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Ajoute la colonne 'isin' dans perf_df via PeersLoader."""
|
||||
perf_df = perf_df.copy()
|
||||
perf_df["Date"] = pd.to_datetime(perf_df["Date"])
|
||||
perf_df["isin"] = perf_df["shareClass_name"].apply(
|
||||
self.loader.resolve_shareclass_name
|
||||
)
|
||||
|
||||
# Ajouter la stratégie Carmignac si disponible
|
||||
isin_to_strategy = dict(
|
||||
zip(self.loader.carmignac_df["ISIN"],
|
||||
self.loader.carmignac_df["carmignac_strategy"])
|
||||
)
|
||||
perf_df["carmignac_strategy"] = perf_df["isin"].map(isin_to_strategy)
|
||||
return perf_df
|
||||
|
||||
# ── Étape 2 : features AUM ────────────────────────────────────────────────
|
||||
|
||||
def _build_aum_features(self, stocks_df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Construit les features comportementales depuis les snapshots AUM mensuels.
|
||||
|
||||
Features produites (par compte × fonds × date) :
|
||||
aum_t : encours à t
|
||||
aum_lag{n} : encours à t-n mois (n ∈ AUM_LAGS)
|
||||
aum_growth_{n}m : croissance relative sur n mois
|
||||
aum_share_wallet : part du fonds dans le portefeuille total du compte
|
||||
"""
|
||||
df = stocks_df.copy()
|
||||
df["Centralisation Date"] = pd.to_datetime(df["Centralisation Date"])
|
||||
|
||||
# Tri pour les lags
|
||||
df = df.sort_values(["Registrar Account - ID", "Product - Isin",
|
||||
"Centralisation Date"])
|
||||
|
||||
grp = df.groupby(["Registrar Account - ID", "Product - Isin"])
|
||||
|
||||
# Lags AUM
|
||||
for lag in AUM_LAGS:
|
||||
df[f"aum_lag{lag}"] = grp["Value - AUM €"].shift(lag)
|
||||
|
||||
# Croissances
|
||||
for lag in AUM_LAGS:
|
||||
df[f"aum_growth_{lag}m"] = (
|
||||
(df["Value - AUM €"] - df[f"aum_lag{lag}"])
|
||||
/ (df[f"aum_lag{lag}"].abs() + 1.0)
|
||||
)
|
||||
|
||||
# Part dans le portefeuille total du compte (concentration)
|
||||
total_aum_by_account = (
|
||||
df.groupby(["Registrar Account - ID", "Centralisation Date"])["Value - AUM €"]
|
||||
.transform("sum")
|
||||
)
|
||||
df["aum_share_wallet"] = df["Value - AUM €"] / (total_aum_by_account + 1.0)
|
||||
|
||||
# Renommage pour clarté
|
||||
df = df.rename(columns={"Value - AUM €": "aum_t"})
|
||||
|
||||
# Colonnes à conserver
|
||||
keep = (
|
||||
["Registrar Account - ID", "Product - Isin", "Centralisation Date",
|
||||
"Registrar Account - Region", "RegistrarAccount - Country",
|
||||
"Product - Asset Type", "Product - Strategy", "Product - Fund",
|
||||
"aum_t", "aum_share_wallet"]
|
||||
+ [f"aum_lag{lag}" for lag in AUM_LAGS]
|
||||
+ [f"aum_growth_{lag}m" for lag in AUM_LAGS]
|
||||
)
|
||||
keep = [c for c in keep if c in df.columns]
|
||||
return df[keep]
|
||||
|
||||
# ── Étape 3 : features de performance absolue ─────────────────────────────
|
||||
|
||||
def _build_absolute_perf_features(self, perf_df: pd.DataFrame,
|
||||
perf_periods: list[str]) -> pd.DataFrame:
|
||||
"""
|
||||
Pivote weekly_perf pour obtenir une ligne par (isin, date)
|
||||
avec une colonne par (période × métrique).
|
||||
|
||||
Colonnes produites : perf_return_6MoRet, perf_pct_1YrRet, etc.
|
||||
"""
|
||||
relevant = perf_df[perf_df["perfPeriod"].isin(perf_periods)].copy()
|
||||
if relevant.empty:
|
||||
return pd.DataFrame(columns=["isin", "Date"])
|
||||
|
||||
pivoted = relevant.pivot_table(
|
||||
index=["isin", "Date"],
|
||||
columns="perfPeriod",
|
||||
values=["return", "percentile"],
|
||||
aggfunc="mean"
|
||||
)
|
||||
# Aplatir les colonnes multi-index
|
||||
pivoted.columns = [
|
||||
f"perf_{metric}_{period}"
|
||||
for metric, period in pivoted.columns
|
||||
]
|
||||
return pivoted.reset_index()
|
||||
|
||||
# ── Étape 4 : jointure AUM × perf absolue ────────────────────────────────
|
||||
|
||||
def _merge_aum_and_perf(self, stocks_feat: pd.DataFrame,
|
||||
perf_abs: pd.DataFrame,
|
||||
verbose: bool) -> pd.DataFrame:
|
||||
"""
|
||||
merge_asof temporel : pour chaque snapshot mensuel AUM,
|
||||
trouve la performance hebdomadaire la plus récente ≤ date snapshot.
|
||||
"""
|
||||
if perf_abs.empty:
|
||||
if verbose:
|
||||
print(" ⚠ Aucune performance absolue à joindre.")
|
||||
return stocks_feat
|
||||
|
||||
merged_parts = []
|
||||
for isin_val in stocks_feat["Product - Isin"].unique():
|
||||
s = stocks_feat[stocks_feat["Product - Isin"] == isin_val].sort_values(
|
||||
"Centralisation Date")
|
||||
p = perf_abs[perf_abs["isin"] == isin_val].sort_values("Date")
|
||||
|
||||
if p.empty:
|
||||
merged_parts.append(s)
|
||||
continue
|
||||
|
||||
m = pd.merge_asof(
|
||||
s, p,
|
||||
left_on="Centralisation Date",
|
||||
right_on="Date",
|
||||
direction="backward",
|
||||
tolerance=pd.Timedelta(f"{MERGE_ASOF_TOLERANCE_DAYS}d")
|
||||
)
|
||||
merged_parts.append(m)
|
||||
|
||||
result = pd.concat(merged_parts, ignore_index=True)
|
||||
perf_cols_joined = [c for c in result.columns if c.startswith("perf_")]
|
||||
|
||||
if verbose:
|
||||
n_matched = result[perf_cols_joined[0]].notna().sum() if perf_cols_joined else 0
|
||||
print(f" {n_matched}/{len(result)} lignes avec performance jointe "
|
||||
f"({len(perf_cols_joined)} colonnes)")
|
||||
|
||||
return result
|
||||
|
||||
# ── Étape 5 : jointure métriques relatives ────────────────────────────────
|
||||
|
||||
def _merge_relative_perf(self, dataset: pd.DataFrame,
|
||||
rel_df: pd.DataFrame,
|
||||
verbose: bool) -> pd.DataFrame:
|
||||
"""
|
||||
Joint les métriques relatives sur (carmignac_strategy, date).
|
||||
|
||||
Stratégie de jointure :
|
||||
- La stratégie Carmignac d'un compte est déduite depuis
|
||||
Product - Strategy (nom court) ou ISIN via peers_loader.
|
||||
- merge_asof temporel avec tolérance ±35j.
|
||||
"""
|
||||
# Récupérer la stratégie Carmignac depuis l'ISIN
|
||||
isin_to_strategy = dict(
|
||||
zip(self.loader.carmignac_df["ISIN"],
|
||||
self.loader.carmignac_df["carmignac_strategy"])
|
||||
)
|
||||
dataset["carmignac_strategy"] = dataset["Product - Isin"].map(isin_to_strategy)
|
||||
|
||||
# Agréger rel_df sur toutes périodes (moyenne par stratégie × date)
|
||||
rel_cols = [c for c in rel_df.columns if c.startswith("rel_")]
|
||||
if not rel_cols:
|
||||
return dataset
|
||||
|
||||
rel_agg = (rel_df
|
||||
.groupby(["carmignac_strategy", "Date"])[rel_cols]
|
||||
.mean()
|
||||
.reset_index()
|
||||
.sort_values(["carmignac_strategy", "Date"]))
|
||||
|
||||
merged_parts = []
|
||||
for strat in dataset["carmignac_strategy"].dropna().unique():
|
||||
d_strat = dataset[dataset["carmignac_strategy"] == strat].sort_values(
|
||||
"Centralisation Date")
|
||||
r_strat = rel_agg[rel_agg["carmignac_strategy"] == strat].sort_values("Date")
|
||||
|
||||
if r_strat.empty:
|
||||
merged_parts.append(d_strat)
|
||||
continue
|
||||
|
||||
m = pd.merge_asof(
|
||||
d_strat, r_strat,
|
||||
left_on="Centralisation Date",
|
||||
right_on="Date",
|
||||
direction="backward",
|
||||
tolerance=pd.Timedelta(f"{MERGE_ASOF_TOLERANCE_DAYS}d")
|
||||
)
|
||||
merged_parts.append(m)
|
||||
|
||||
# Ajouter les comptes sans stratégie identifiée
|
||||
no_strat = dataset[dataset["carmignac_strategy"].isna()]
|
||||
if not no_strat.empty:
|
||||
merged_parts.append(no_strat)
|
||||
|
||||
result = pd.concat(merged_parts, ignore_index=True)
|
||||
|
||||
if verbose:
|
||||
n_rel = result[rel_cols[0]].notna().sum() if rel_cols else 0
|
||||
print(f" {n_rel}/{len(result)} lignes avec métriques relatives jointes "
|
||||
f"({len(rel_cols)} colonnes)")
|
||||
|
||||
return result
|
||||
|
||||
# ── Étape 6 : variable cible ──────────────────────────────────────────────
|
||||
|
||||
def _build_target(self, dataset: pd.DataFrame, lag: int) -> pd.DataFrame:
|
||||
"""
|
||||
Construit la variable cible : ΔAum(t → t+lag).
|
||||
|
||||
flux_net_proxy = aum(t+lag) - aum(t)
|
||||
|
||||
Note : Dans un contexte de production, remplacer par :
|
||||
flux_net = sum(souscriptions[t:t+lag]) - sum(rachats[t:t+lag])
|
||||
depuis le fichier de transactions quotidiennes.
|
||||
"""
|
||||
dataset = dataset.sort_values(
|
||||
["Registrar Account - ID", "Product - Isin", "Centralisation Date"]
|
||||
)
|
||||
grp = dataset.groupby(["Registrar Account - ID", "Product - Isin"])
|
||||
|
||||
dataset["aum_next"] = grp["aum_t"].shift(-lag)
|
||||
dataset["flux_net_proxy"] = dataset["aum_next"] - dataset["aum_t"]
|
||||
|
||||
# Feature supplémentaire : flux relatif (normalisé par l'AUM)
|
||||
dataset["flux_net_relative"] = (
|
||||
dataset["flux_net_proxy"] / (dataset["aum_t"].abs() + 1.0)
|
||||
)
|
||||
|
||||
return dataset
|
||||
|
||||
# ── Étape 7 : nettoyage final ─────────────────────────────────────────────
|
||||
|
||||
def _final_cleanup(self, dataset: pd.DataFrame,
|
||||
verbose: bool) -> pd.DataFrame:
|
||||
"""
|
||||
Supprime les doublons de colonnes, retire les lignes sans cible,
|
||||
log les taux de remplissage.
|
||||
"""
|
||||
# Supprimer les colonnes en double (artefacts du merge)
|
||||
dataset = dataset.loc[:, ~dataset.columns.duplicated()]
|
||||
|
||||
# Retirer les lignes sans variable cible
|
||||
n_before = len(dataset)
|
||||
dataset = dataset.dropna(subset=["flux_net_proxy"])
|
||||
n_after = len(dataset)
|
||||
|
||||
if verbose and n_before > n_after:
|
||||
print(f" Lignes supprimées (cible manquante) : {n_before - n_after}")
|
||||
|
||||
return dataset.reset_index(drop=True)
|
||||
|
||||
# ── Résumé ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _print_dataset_summary(self, dataset: pd.DataFrame):
|
||||
feature_cols = self.get_feature_columns(dataset)
|
||||
print("\n── Dataset final ───────────────────────────────────────")
|
||||
print(f"Lignes : {len(dataset):,}")
|
||||
print(f"Colonnes totales : {len(dataset.columns)}")
|
||||
print(f"Features : {len(feature_cols)}")
|
||||
print(f"\nTaux de remplissage des features :")
|
||||
|
||||
families = {
|
||||
"AUM": [c for c in feature_cols if c.startswith("aum_")],
|
||||
"Perf abs": [c for c in feature_cols if c.startswith("perf_")],
|
||||
"Perf rel": [c for c in feature_cols if c.startswith("rel_")],
|
||||
}
|
||||
for family, cols in families.items():
|
||||
if cols:
|
||||
fill_rates = dataset[cols].notna().mean()
|
||||
avg_fill = fill_rates.mean()
|
||||
print(f" {family:<12} ({len(cols):2d} cols) : {avg_fill:.1%} en moyenne")
|
||||
|
||||
print(f"\nVariable cible 'flux_net_proxy' :")
|
||||
t = dataset["flux_net_proxy"]
|
||||
print(f" Médiane : {t.median():+,.0f} €")
|
||||
print(f" Std : {t.std():,.0f} €")
|
||||
print(f" % positif (souscription nette) : {(t > 0).mean():.1%}")
|
||||
print(f" % négatif (rachat net) : {(t < 0).mean():.1%}")
|
||||
print("─────────────────────────────────────────────────────────")
|
||||
|
||||
# ── API publique ──────────────────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def get_feature_columns(dataset: pd.DataFrame) -> list[str]:
|
||||
"""Retourne la liste des colonnes de features (exclut les métadonnées et la cible)."""
|
||||
exclude = {
|
||||
"Registrar Account - ID", "Product - Isin", "Centralisation Date",
|
||||
"Product - Fund", "Product - Strategy", "Product - Asset Type",
|
||||
"Registrar Account - Region", "RegistrarAccount - Country",
|
||||
"carmignac_strategy", "isin", "Date",
|
||||
"aum_next", "flux_net_proxy", "flux_net_relative", "shareClass_name",
|
||||
}
|
||||
return [c for c in dataset.columns if c not in exclude
|
||||
and dataset[c].dtype in [np.float64, np.int64, float, int]]
|
||||
|
||||
@staticmethod
|
||||
def get_target_column() -> str:
|
||||
return "flux_net_proxy"
|
||||
|
||||
|
||||
# ── Usage autonome ────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from peers_loader import PeersLoader
|
||||
from relative_performance import RelativePerformanceCalculator
|
||||
|
||||
peers_dir = sys.argv[1] if len(sys.argv) > 1 else "."
|
||||
stocks_path = sys.argv[2] if len(sys.argv) > 2 else "equity_stocks_head.csv"
|
||||
perf_path = sys.argv[3] if len(sys.argv) > 3 else "weekly_perf_head.csv"
|
||||
|
||||
loader = PeersLoader(peers_dir=peers_dir).load()
|
||||
calc = RelativePerformanceCalculator(loader)
|
||||
builder = FeatureBuilder(loader, calc)
|
||||
|
||||
stocks_df = pd.read_csv(stocks_path)
|
||||
perf_df = pd.read_csv(perf_path)
|
||||
|
||||
dataset = builder.build(stocks_df, perf_df)
|
||||
|
||||
print("\nAperçu du dataset :")
|
||||
feature_cols = FeatureBuilder.get_feature_columns(dataset)
|
||||
print(dataset[feature_cols].describe().round(3).to_string())
|
||||
|
||||
dataset.to_csv("dataset_features.csv", index=False)
|
||||
print("\nDataset sauvegardé dans dataset_features.csv")
|
||||
399
src/peers_loader.py
Normal file
399
src/peers_loader.py
Normal file
|
|
@ -0,0 +1,399 @@
|
|||
"""
|
||||
peers_loader.py
|
||||
---------------
|
||||
Chargement et consolidation de tous les fichiers *_peers.csv.
|
||||
|
||||
Rôles :
|
||||
1. Parser tous les fichiers <STRATEGY>_peers.csv depuis un dossier local
|
||||
ou depuis un bucket S3 (via s3fs)
|
||||
2. Identifier automatiquement les lignes Carmignac vs concurrents
|
||||
3. Construire une table de référence ISIN ↔ shareClass_name ↔ stratégie Carmignac
|
||||
→ résout le problème de jointure avec weekly_perf (qui n'a que les noms)
|
||||
4. Exposer des helpers utilisés par les autres modules
|
||||
|
||||
Usage (local) :
|
||||
from peers_loader import PeersLoader
|
||||
loader = PeersLoader("path/to/peers/")
|
||||
loader.load()
|
||||
|
||||
Usage (S3) :
|
||||
from peers_loader import PeersLoader
|
||||
loader = PeersLoader(
|
||||
"s3://my-bucket/peers/",
|
||||
s3_options={"key": "ACCESS_KEY", "secret": "SECRET_KEY"},
|
||||
# ou laisser s3_options=None pour utiliser les credentials AWS standard
|
||||
# (variables d'environnement, ~/.aws/credentials, IAM role...)
|
||||
)
|
||||
loader.load()
|
||||
|
||||
# Table de référence complète
|
||||
loader.peers_df # tous les fonds (Carmignac + concurrents)
|
||||
loader.isin_map # dict shareClass_name → ISIN
|
||||
loader.carmignac_df # uniquement les shareclasses Carmignac
|
||||
loader.competitors_df # uniquement les concurrents
|
||||
"""
|
||||
|
||||
import re
|
||||
import io
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ── Constantes ────────────────────────────────────────────────────────────────
|
||||
|
||||
PEERS_SEPARATOR = "|"
|
||||
CARMIGNAC_IDENTIFIERS = ["carmignac"] # en minuscules — permet de détecter les lignes Carmignac
|
||||
|
||||
COLUMNS_DTYPES = {
|
||||
"Name": str,
|
||||
"ISIN": str,
|
||||
"SecId_MS": str,
|
||||
"FundId": str,
|
||||
"Global Broad Category Group": str,
|
||||
"Global Category": str,
|
||||
"Morningstar Category": str,
|
||||
"Index Fund": str,
|
||||
"Enhanced Index": str,
|
||||
"Domicile": str,
|
||||
}
|
||||
|
||||
DATE_COLUMNS = ["Inception Date", "Inception Date of Fund's Oldest Share Class"]
|
||||
|
||||
S3_PREFIX = "s3://"
|
||||
|
||||
|
||||
# ── Backends d'accès aux fichiers ─────────────────────────────────────────────
|
||||
|
||||
class _LocalBackend:
|
||||
"""Accès au système de fichiers local via pathlib."""
|
||||
|
||||
def __init__(self, root: str):
|
||||
self.root = Path(root)
|
||||
|
||||
def list_files(self, pattern: re.Pattern) -> list[str]:
|
||||
return sorted(
|
||||
str(f) for f in self.root.iterdir()
|
||||
if f.is_file() and pattern.match(f.name)
|
||||
)
|
||||
|
||||
def read_csv(self, path: str, **kwargs) -> pd.DataFrame:
|
||||
return pd.read_csv(path, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def filename(path: str) -> str:
|
||||
return Path(path).name
|
||||
|
||||
|
||||
class _S3Backend:
|
||||
"""
|
||||
Accès à un bucket S3 via s3fs.
|
||||
|
||||
s3fs expose une interface quasi-identique à pathlib/os, ce qui permet
|
||||
de garder la logique de PeersLoader totalement inchangée.
|
||||
|
||||
Authentification (par ordre de priorité) :
|
||||
1. s3_options explicites : {"key": ..., "secret": ..., "token": ...}
|
||||
2. Variables d'environnement : AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
|
||||
3. Fichier ~/.aws/credentials
|
||||
4. IAM role (si exécution sur EC2/ECS/Lambda)
|
||||
"""
|
||||
|
||||
def __init__(self, root: str, s3_options: Optional[dict] = None):
|
||||
try:
|
||||
import s3fs
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Le package s3fs est requis pour lire depuis S3.\n"
|
||||
"Installer avec : pip install s3fs"
|
||||
)
|
||||
|
||||
# Normalisation du chemin : retirer le préfixe s3://
|
||||
self.root = root.rstrip("/")
|
||||
if self.root.startswith(S3_PREFIX):
|
||||
self.root = self.root[len(S3_PREFIX):]
|
||||
|
||||
# Création du filesystem S3
|
||||
# s3_options=None → s3fs utilise les credentials AWS standard
|
||||
self.fs = s3fs.S3FileSystem(**(s3_options or {}))
|
||||
|
||||
def list_files(self, pattern: re.Pattern) -> list[str]:
|
||||
"""Liste les fichiers dans le dossier S3 correspondant au pattern."""
|
||||
all_files = self.fs.ls(self.root, detail=False)
|
||||
matched = sorted(
|
||||
f"s3://{p}" for p in all_files
|
||||
if pattern.match(p.split("/")[-1])
|
||||
)
|
||||
return matched
|
||||
|
||||
def read_csv(self, path: str, **kwargs) -> pd.DataFrame:
|
||||
"""Lit un CSV depuis S3 en passant par un buffer en mémoire."""
|
||||
# Normaliser : s3fs.open() accepte "bucket/key" ou "s3://bucket/key"
|
||||
s3_path = path.removeprefix(S3_PREFIX)
|
||||
with self.fs.open(s3_path, "rb") as f:
|
||||
return pd.read_csv(io.BytesIO(f.read()), **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def filename(path: str) -> str:
|
||||
"""Retourne le nom de fichier depuis un chemin S3 complet."""
|
||||
return path.split("/")[-1]
|
||||
|
||||
|
||||
# ── Classe principale ─────────────────────────────────────────────────────────
|
||||
|
||||
class PeersLoader:
|
||||
"""
|
||||
Charge et consolide l'ensemble des fichiers *_peers.csv.
|
||||
Supporte les sources locales et S3 de manière transparente.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
peers_dir : str
|
||||
Dossier local (ex: "data/peers/") ou chemin S3 (ex: "s3://bucket/peers/").
|
||||
|
||||
s3_options : dict, optionnel
|
||||
Options d'authentification S3 passées à s3fs.S3FileSystem.
|
||||
Exemples :
|
||||
{"key": "AKID...", "secret": "secret..."} # clés explicites
|
||||
{"profile": "my-aws-profile"} # profil AWS CLI
|
||||
{"anon": True} # bucket public
|
||||
Si None (défaut), s3fs utilise la chaîne de credentials AWS standard.
|
||||
Ignoré si peers_dir est un chemin local.
|
||||
"""
|
||||
|
||||
def __init__(self, peers_dir: str, s3_options: Optional[dict] = None):
|
||||
self._backend = (
|
||||
_S3Backend(peers_dir, s3_options)
|
||||
if peers_dir.startswith(S3_PREFIX)
|
||||
else _LocalBackend(peers_dir)
|
||||
)
|
||||
self.peers_dir = peers_dir
|
||||
self.peers_df = pd.DataFrame()
|
||||
self.carmignac_df = pd.DataFrame()
|
||||
self.competitors_df = pd.DataFrame()
|
||||
self.isin_map = {}
|
||||
self._loaded = False
|
||||
|
||||
# ── Chargement ────────────────────────────────────────────────────────────
|
||||
|
||||
def load(self, verbose: bool = True) -> "PeersLoader":
|
||||
"""Charge tous les fichiers peers et construit les tables dérivées."""
|
||||
files = self._discover_files()
|
||||
if not files:
|
||||
raise FileNotFoundError(
|
||||
f"Aucun fichier *_peers.csv trouvé dans {self.peers_dir}"
|
||||
)
|
||||
|
||||
parts = []
|
||||
for f in files:
|
||||
strategy_code = self._extract_strategy_code(f)
|
||||
df = self._load_single_file(f, strategy_code)
|
||||
parts.append(df)
|
||||
if verbose:
|
||||
n_carm = df["is_carmignac"].sum()
|
||||
n_comp = (~df["is_carmignac"]).sum()
|
||||
print(f" [{strategy_code}] {len(df)} fonds chargés "
|
||||
f"({n_carm} Carmignac, {n_comp} concurrents)")
|
||||
|
||||
self.peers_df = pd.concat(parts, ignore_index=True)
|
||||
self._build_derived_tables()
|
||||
self._loaded = True
|
||||
|
||||
if verbose:
|
||||
self._print_summary()
|
||||
|
||||
return self
|
||||
|
||||
def _discover_files(self) -> list[str]:
|
||||
"""Retourne les chemins des fichiers *_peers.csv via le backend actif."""
|
||||
pattern = re.compile(r"^[A-Z]+_peers\.csv$")
|
||||
return self._backend.list_files(pattern)
|
||||
|
||||
def _extract_strategy_code(self, filepath: str) -> str:
|
||||
"""Extrait le code stratégie depuis le nom de fichier (ex: CAD, CARE)."""
|
||||
filename = self._backend.filename(filepath)
|
||||
return Path(filename).stem.replace("_peers", "")
|
||||
|
||||
def _load_single_file(self, filepath: str, strategy_code: str) -> pd.DataFrame:
|
||||
"""Charge un fichier peers (local ou S3) et l'enrichit avec les colonnes de contexte."""
|
||||
df = self._backend.read_csv(filepath, sep=PEERS_SEPARATOR, dtype=str)
|
||||
|
||||
# Parsing des dates
|
||||
for col in DATE_COLUMNS:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_datetime(df[col], errors="coerce")
|
||||
|
||||
# Nettoyage des colonnes texte
|
||||
for col in COLUMNS_DTYPES:
|
||||
if col in df.columns:
|
||||
df[col] = df[col].str.strip()
|
||||
|
||||
# Colonnes de contexte
|
||||
df["carmignac_strategy"] = strategy_code
|
||||
df["is_carmignac"] = df["Name"].str.lower().str.contains(
|
||||
"|".join(CARMIGNAC_IDENTIFIERS), na=False
|
||||
)
|
||||
df["is_index_fund"] = df["Index Fund"].str.lower().eq("yes")
|
||||
|
||||
return df
|
||||
|
||||
# ── Construction des tables dérivées ─────────────────────────────────────
|
||||
|
||||
def _build_derived_tables(self):
|
||||
"""Construit carmignac_df, competitors_df et isin_map."""
|
||||
|
||||
# Tables Carmignac / concurrents
|
||||
self.carmignac_df = self.peers_df[self.peers_df["is_carmignac"]].copy()
|
||||
self.competitors_df = self.peers_df[~self.peers_df["is_carmignac"]].copy()
|
||||
|
||||
# Table de correspondance Name → ISIN (utile pour joindre weekly_perf)
|
||||
valid = self.peers_df[self.peers_df["ISIN"].notna() & (self.peers_df["ISIN"] != "")]
|
||||
self.isin_map = dict(zip(valid["Name"], valid["ISIN"]))
|
||||
|
||||
# Table de correspondance enrichie : shareClass_name (format Morningstar)
|
||||
# → ISIN + stratégie Carmignac. Utilisée par feature_engineering.py.
|
||||
self.name_to_strategy = dict(
|
||||
zip(
|
||||
self.carmignac_df["Name"],
|
||||
self.carmignac_df["carmignac_strategy"]
|
||||
)
|
||||
)
|
||||
|
||||
# ── API publique ──────────────────────────────────────────────────────────
|
||||
|
||||
def get_peers_for_strategy(self, strategy_code: str,
|
||||
exclude_index: bool = False,
|
||||
exclude_carmignac: bool = False) -> pd.DataFrame:
|
||||
"""
|
||||
Retourne les fonds peers d'une stratégie Carmignac donnée.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
strategy_code : ex. "CAD", "CARE"
|
||||
exclude_index : si True, exclut les fonds indiciels (ETF)
|
||||
exclude_carmignac : si True, exclut les fonds Carmignac eux-mêmes
|
||||
"""
|
||||
self._check_loaded()
|
||||
mask = self.peers_df["carmignac_strategy"] == strategy_code
|
||||
df = self.peers_df[mask].copy()
|
||||
|
||||
if exclude_index:
|
||||
df = df[~df["is_index_fund"]]
|
||||
if exclude_carmignac:
|
||||
df = df[~df["is_carmignac"]]
|
||||
|
||||
return df
|
||||
|
||||
def get_carmignac_isin_for_strategy(self, strategy_code: str) -> list[str]:
|
||||
"""Retourne la liste des ISIN Carmignac pour une stratégie donnée."""
|
||||
self._check_loaded()
|
||||
mask = (
|
||||
(self.carmignac_df["carmignac_strategy"] == strategy_code)
|
||||
& self.carmignac_df["ISIN"].notna()
|
||||
)
|
||||
return self.carmignac_df[mask]["ISIN"].tolist()
|
||||
|
||||
def get_competitor_isin_for_strategy(self, strategy_code: str,
|
||||
exclude_index: bool = True) -> list[str]:
|
||||
"""Retourne la liste des ISIN concurrents pour une stratégie donnée."""
|
||||
self._check_loaded()
|
||||
peers = self.get_peers_for_strategy(strategy_code,
|
||||
exclude_index=exclude_index,
|
||||
exclude_carmignac=True)
|
||||
return peers["ISIN"].dropna().tolist()
|
||||
|
||||
def get_strategy_universe(self) -> pd.DataFrame:
|
||||
"""
|
||||
Retourne un résumé par stratégie Carmignac :
|
||||
nombre de shareclasses Carmignac, nombre de concurrents,
|
||||
catégories Morningstar couvertes.
|
||||
"""
|
||||
self._check_loaded()
|
||||
rows = []
|
||||
for strat in self.peers_df["carmignac_strategy"].unique():
|
||||
sub = self.peers_df[self.peers_df["carmignac_strategy"] == strat]
|
||||
rows.append({
|
||||
"strategy": strat,
|
||||
"n_carmignac_sc": sub["is_carmignac"].sum(),
|
||||
"n_competitors": (~sub["is_carmignac"] & ~sub["is_index_fund"]).sum(),
|
||||
"n_index_funds": sub["is_index_fund"].sum(),
|
||||
"ms_categories": ", ".join(sub["Morningstar Category"].dropna().unique()),
|
||||
"broad_category": sub["Global Broad Category Group"].dropna().iloc[0]
|
||||
if len(sub) > 0 else "",
|
||||
})
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
def resolve_shareclass_name(self, name: str) -> str | None:
|
||||
"""
|
||||
Tente de retrouver l'ISIN depuis un nom de shareclass Morningstar.
|
||||
Matching exact d'abord, puis matching partiel (substring).
|
||||
Utilisé pour joindre weekly_perf avec la table peers.
|
||||
"""
|
||||
self._check_loaded()
|
||||
# 1. Exact match
|
||||
if name in self.isin_map:
|
||||
return self.isin_map[name]
|
||||
# 2. Substring match (le nom dans weekly_perf peut légèrement différer)
|
||||
name_lower = name.lower()
|
||||
for ref_name, isin in self.isin_map.items():
|
||||
if ref_name.lower() in name_lower or name_lower in ref_name.lower():
|
||||
return isin
|
||||
return None
|
||||
|
||||
# ── Helpers internes ──────────────────────────────────────────────────────
|
||||
|
||||
def _check_loaded(self):
|
||||
if not self._loaded:
|
||||
raise RuntimeError("Appeler .load() avant d'utiliser PeersLoader.")
|
||||
|
||||
def _print_summary(self):
|
||||
print("\n── Résumé PeersLoader ───────────────────────────────────")
|
||||
universe = self.get_strategy_universe()
|
||||
print(universe.to_string(index=False))
|
||||
print(f"\nTotal fonds uniques (ISIN) : {self.peers_df['ISIN'].nunique()}")
|
||||
print(f" dont Carmignac : {self.carmignac_df['ISIN'].nunique()}")
|
||||
print(f" dont concurrents actifs : "
|
||||
f"{self.competitors_df[~self.competitors_df['is_index_fund']]['ISIN'].nunique()}")
|
||||
print("─────────────────────────────────────────────────────────\n")
|
||||
|
||||
|
||||
# ── Usage autonome ────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="PeersLoader — test autonome")
|
||||
parser.add_argument("peers_dir", nargs="?", default=".",
|
||||
help="Chemin local ou s3://bucket/prefix/")
|
||||
parser.add_argument("--aws-key", default=None, help="AWS Access Key ID")
|
||||
parser.add_argument("--aws-secret", default=None, help="AWS Secret Access Key")
|
||||
parser.add_argument("--aws-token", default=None, help="AWS Session Token (optionnel)")
|
||||
parser.add_argument("--aws-profile",default=None, help="Profil AWS CLI (~/.aws/credentials)")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Construction de s3_options uniquement si des credentials sont fournis
|
||||
s3_options = None
|
||||
if args.aws_key and args.aws_secret:
|
||||
s3_options = {"key": args.aws_key, "secret": args.aws_secret}
|
||||
if args.aws_token:
|
||||
s3_options["token"] = args.aws_token
|
||||
elif args.aws_profile:
|
||||
s3_options = {"profile": args.aws_profile}
|
||||
|
||||
loader = PeersLoader(args.peers_dir, s3_options=s3_options)
|
||||
loader.load()
|
||||
|
||||
print("\nShareclasses Carmignac identifiées :")
|
||||
print(loader.carmignac_df[["carmignac_strategy", "Name", "ISIN",
|
||||
"Morningstar Category"]].to_string(index=False))
|
||||
|
||||
print("\nTest resolve_shareclass_name :")
|
||||
test_names = [
|
||||
"Carmignac Pf Asia Discovery A EUR Acc",
|
||||
"Carmignac Absolute Ret Eur A EUR Acc",
|
||||
"MS INVF Asia Opportunity A",
|
||||
]
|
||||
for n in test_names:
|
||||
isin = loader.resolve_shareclass_name(n)
|
||||
print(f" {n!r:50s} → {isin}")
|
||||
415
src/pipeline.py
Normal file
415
src/pipeline.py
Normal file
|
|
@ -0,0 +1,415 @@
|
|||
"""
|
||||
pipeline.py
|
||||
------------
|
||||
Orchestrateur du pipeline complet.
|
||||
|
||||
Ce script assemble les quatre modules dans l'ordre et produit :
|
||||
- dataset_features.csv : dataset de modelisation complet
|
||||
- model_results.png : graphiques walk-forward + importances
|
||||
- peers_summary.csv : resume de l'univers concurrentiel
|
||||
|
||||
Modules appeles :
|
||||
peers_loader.py -> PeersLoader
|
||||
relative_performance.py -> RelativePerformanceCalculator
|
||||
feature_engineering.py -> FeatureBuilder
|
||||
predictive_model.py -> WalkForwardModel
|
||||
|
||||
Configuration :
|
||||
Tous les chemins sont centralises dans la section CONFIG ci-dessous.
|
||||
Les chemins s3:// sont supportes nativement pour les inputs ET les outputs.
|
||||
|
||||
Usage (local) :
|
||||
python pipeline.py
|
||||
|
||||
Usage (S3 / MinIO) :
|
||||
python pipeline.py \\
|
||||
--peers-dir s3://projet-bdc-carmignac-g3/peers/ \\
|
||||
--stocks s3://projet-bdc-carmignac-g3/aum_equity_95pct.csv \\
|
||||
--perf s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv \\
|
||||
--out-dir s3://projet-bdc-carmignac-g3/outputs/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
import s3fs
|
||||
|
||||
# -- Imports des modules locaux -----------------------------------------------
|
||||
from peers_loader import PeersLoader
|
||||
from relative_performance import RelativePerformanceCalculator, summarize_relative_performance
|
||||
from feature_engineering import FeatureBuilder
|
||||
from predictive_model import WalkForwardModel
|
||||
|
||||
|
||||
S3_PREFIX = "s3://"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIG — Pointeur vers les donnees completes
|
||||
# =============================================================================
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
# Dossier contenant tous les *_peers.csv (local ou s3://)
|
||||
"peers_dir": "s3://projet-bdc-carmignac-g3/peers/",
|
||||
|
||||
# Fichiers de donnees principaux (local ou s3://)
|
||||
"stocks_path": "s3://projet-bdc-carmignac-g3/stock_repaired.csv",
|
||||
"perf_path": "s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv",
|
||||
|
||||
# Outputs (local ou s3://)
|
||||
"out_dataset": "dataset_features.csv",
|
||||
"out_plot": "model_results.png",
|
||||
"out_peers": "peers_summary.csv",
|
||||
"out_rel_perf": "relative_performance.csv",
|
||||
|
||||
# Parametres modele
|
||||
"target_lag": 1, # horizon de prediction (mois)
|
||||
"min_train_frac": 0.4, # fraction min de dates en train
|
||||
"perf_periods": None, # None = toutes disponibles
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# -- Helpers S3 ---------------------------------------------------------------
|
||||
|
||||
def build_s3_filesystem() -> s3fs.S3FileSystem:
|
||||
"""
|
||||
Construit le filesystem S3 avec les credentials du groupe GENES / MinIO.
|
||||
|
||||
Les credentials sont lus depuis les variables d'environnement :
|
||||
AWS_ACCESS_KEY_ID
|
||||
AWS_SECRET_ACCESS_KEY
|
||||
AWS_SESSION_TOKEN (optionnel — requis si session temporaire)
|
||||
|
||||
L'endpoint MinIO est fixe : minio-simple.lab.groupe-genes.fr
|
||||
"""
|
||||
fs = s3fs.S3FileSystem(
|
||||
client_kwargs={
|
||||
"endpoint_url": "https://minio-simple.lab.groupe-genes.fr"
|
||||
},
|
||||
key=os.environ["AWS_ACCESS_KEY_ID"],
|
||||
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
||||
token=os.environ.get("AWS_SESSION_TOKEN"), # None si absent
|
||||
)
|
||||
return fs
|
||||
|
||||
|
||||
def build_s3_options() -> dict:
|
||||
"""
|
||||
Construit le dictionnaire s3_options attendu par PeersLoader._S3Backend,
|
||||
en utilisant les memes credentials et endpoint que build_s3_filesystem().
|
||||
"""
|
||||
return {
|
||||
"client_kwargs": {
|
||||
"endpoint_url": "https://minio-simple.lab.groupe-genes.fr"
|
||||
},
|
||||
"key": os.environ["AWS_ACCESS_KEY_ID"],
|
||||
"secret": os.environ["AWS_SECRET_ACCESS_KEY"],
|
||||
"token": os.environ.get("AWS_SESSION_TOKEN"),
|
||||
}
|
||||
|
||||
|
||||
def _is_s3(path: str) -> bool:
|
||||
return str(path).startswith(S3_PREFIX)
|
||||
|
||||
|
||||
def _s3_key(path: str) -> str:
|
||||
"""Retire le prefixe s3:// pour obtenir la cle brute attendue par s3fs."""
|
||||
return path[len(S3_PREFIX):]
|
||||
|
||||
|
||||
def read_csv_any(path: str, fs: Optional[s3fs.S3FileSystem] = None,
|
||||
sep=",") -> pd.DataFrame:
|
||||
"""
|
||||
Lit un CSV depuis un chemin local ou S3, de maniere transparente.
|
||||
|
||||
Parametres
|
||||
----------
|
||||
path : chemin local ou s3://bucket/key
|
||||
fs : filesystem s3fs (obligatoire si path est S3)
|
||||
kwargs : passes a pd.read_csv
|
||||
"""
|
||||
if _is_s3(path):
|
||||
if fs is None:
|
||||
raise ValueError("Un filesystem s3fs est requis pour lire depuis S3.")
|
||||
with fs.open(_s3_key(path), "rb") as f:
|
||||
return pd.read_csv(io.BytesIO(f.read()), sep=sep)
|
||||
return pd.read_csv(path, sep=sep)
|
||||
|
||||
|
||||
def write_csv_any(df: pd.DataFrame, path: str,
|
||||
fs: Optional[s3fs.S3FileSystem] = None, **kwargs):
|
||||
"""
|
||||
Ecrit un DataFrame en CSV vers un chemin local ou S3.
|
||||
|
||||
Parametres
|
||||
----------
|
||||
df : DataFrame a ecrire
|
||||
path : chemin local ou s3://bucket/key
|
||||
fs : filesystem s3fs (obligatoire si path est S3)
|
||||
kwargs : passes a df.to_csv (index=False par defaut)
|
||||
"""
|
||||
kwargs.setdefault("index", False)
|
||||
if _is_s3(path):
|
||||
if fs is None:
|
||||
raise ValueError("Un filesystem s3fs est requis pour ecrire sur S3.")
|
||||
csv_bytes = df.to_csv(**kwargs).encode("utf-8")
|
||||
with fs.open(_s3_key(path), "wb") as f:
|
||||
f.write(csv_bytes)
|
||||
else:
|
||||
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_csv(path, **kwargs)
|
||||
|
||||
|
||||
def write_bytes_any(data: bytes, path: str,
|
||||
fs: Optional[s3fs.S3FileSystem] = None):
|
||||
"""
|
||||
Ecrit des bytes bruts (ex : image PNG) vers un chemin local ou S3.
|
||||
"""
|
||||
if _is_s3(path):
|
||||
if fs is None:
|
||||
raise ValueError("Un filesystem s3fs est requis pour ecrire sur S3.")
|
||||
with fs.open(_s3_key(path), "wb") as f:
|
||||
f.write(data)
|
||||
else:
|
||||
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
||||
Path(path).write_bytes(data)
|
||||
|
||||
|
||||
def path_exists(path: str, fs: Optional[s3fs.S3FileSystem] = None) -> bool:
|
||||
"""Verifie l'existence d'un fichier local ou S3."""
|
||||
if _is_s3(path):
|
||||
return fs is not None and fs.exists(_s3_key(path))
|
||||
return Path(path).exists()
|
||||
|
||||
|
||||
def file_size_kb(path: str, fs: Optional[s3fs.S3FileSystem] = None) -> int:
|
||||
"""Retourne la taille en Ko d'un fichier local ou S3."""
|
||||
if _is_s3(path):
|
||||
if fs is None:
|
||||
return 0
|
||||
return fs.info(_s3_key(path)).get("size", 0) // 1024
|
||||
return Path(path).stat().st_size // 1024
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def run_pipeline(config: dict, verbose: bool = True) -> dict:
|
||||
"""
|
||||
Execute le pipeline complet et retourne un dictionnaire des outputs.
|
||||
|
||||
Retourne
|
||||
--------
|
||||
dict avec les cles :
|
||||
loader : PeersLoader
|
||||
dataset : DataFrame features + target
|
||||
model : WalkForwardModel entraine
|
||||
results_df : metriques walk-forward
|
||||
"""
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(" PIPELINE CARMIGNAC x ENSAE")
|
||||
print(" Performance -> Flux nets")
|
||||
print("=" * 60)
|
||||
|
||||
# Construire le filesystem S3 si au moins un chemin est S3
|
||||
any_s3 = any(
|
||||
_is_s3(str(config.get(k, "")))
|
||||
for k in ["peers_dir", "stocks_path", "perf_path",
|
||||
"out_dataset", "out_plot", "out_peers", "out_rel_perf"]
|
||||
)
|
||||
fs: Optional[s3fs.S3FileSystem] = build_s3_filesystem() if any_s3 else None
|
||||
if fs is not None:
|
||||
print(" Mode S3/MinIO actif (minio-simple.lab.groupe-genes.fr)")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# ETAPE 1 — Chargement des peers
|
||||
# -------------------------------------------------------------------------
|
||||
print("\n[1/5] Chargement des peers...")
|
||||
|
||||
s3_opts = build_s3_options() if _is_s3(config["peers_dir"]) else None
|
||||
loader = PeersLoader(config["peers_dir"], s3_options=s3_opts)
|
||||
loader.load(verbose=verbose)
|
||||
|
||||
write_csv_any(loader.get_strategy_universe(), config["out_peers"], fs=fs)
|
||||
print(f" -> {config['out_peers']} sauvegarde")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# ETAPE 2 — Chargement des donnees
|
||||
# -------------------------------------------------------------------------
|
||||
print("\n[2/5] Chargement des donnees de marche...")
|
||||
|
||||
stocks_df = read_csv_any(config["stocks_path"], fs=fs, sep=";")
|
||||
perf_df = read_csv_any(config["perf_path"], fs=fs, sep=";")
|
||||
perf_df["Date"] = pd.to_datetime(perf_df["Date"])
|
||||
|
||||
print(f" stocks : {stocks_df.shape[0]:,} lignes")
|
||||
print(f" perf : {perf_df.shape[0]:,} lignes | "
|
||||
f"periodes : {sorted(perf_df['perfPeriod'].unique())}")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# ETAPE 3 — Feature engineering (incl. perf relative)
|
||||
# -------------------------------------------------------------------------
|
||||
print("\n[3/5] Feature engineering...")
|
||||
rel_calc = RelativePerformanceCalculator(loader)
|
||||
builder = FeatureBuilder(loader, rel_calc)
|
||||
|
||||
dataset = builder.build(
|
||||
stocks_df = stocks_df,
|
||||
perf_df = perf_df,
|
||||
target_lag = config["target_lag"],
|
||||
perf_periods = config["perf_periods"],
|
||||
verbose = verbose,
|
||||
)
|
||||
|
||||
if not dataset.empty:
|
||||
# Export performances relatives
|
||||
rel_cols = [c for c in dataset.columns if c.startswith("rel_")]
|
||||
id_cols = [c for c in ["Registrar Account - ID", "Product - Isin",
|
||||
"Centralisation Date", "carmignac_strategy"]
|
||||
if c in dataset.columns]
|
||||
if rel_cols:
|
||||
rel_export = dataset[id_cols + rel_cols].dropna(subset=rel_cols, how="all")
|
||||
write_csv_any(rel_export, config["out_rel_perf"], fs=fs)
|
||||
print(f" -> {config['out_rel_perf']} sauvegarde ({len(rel_export):,} lignes)")
|
||||
|
||||
# Export dataset complet
|
||||
write_csv_any(dataset, config["out_dataset"], fs=fs)
|
||||
print(f" -> {config['out_dataset']} sauvegarde ({len(dataset):,} lignes)")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# ETAPE 4 — Modelisation
|
||||
# -------------------------------------------------------------------------
|
||||
print("\n[4/5] Modelisation (walk-forward)...")
|
||||
feature_cols = FeatureBuilder.get_feature_columns(dataset) if not dataset.empty else []
|
||||
model = WalkForwardModel(min_train_frac=config["min_train_frac"])
|
||||
|
||||
if feature_cols and not dataset.empty:
|
||||
results_df = model.fit_evaluate(dataset, feature_cols, verbose=verbose)
|
||||
else:
|
||||
print(" Pas de features disponibles — modelisation ignoree.")
|
||||
results_df = pd.DataFrame()
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# ETAPE 5 — Visualisations
|
||||
# -------------------------------------------------------------------------
|
||||
print("\n[5/5] Generation des visualisations...")
|
||||
|
||||
# predictive_model.plot_results() ecrit un fichier local.
|
||||
# Si l'output cible est S3, on ecrit dans /tmp puis on transfere.
|
||||
tmp_plot = "/tmp/model_results_tmp.png"
|
||||
model.plot_results(output_path=tmp_plot)
|
||||
|
||||
if _is_s3(config["out_plot"]):
|
||||
write_bytes_any(Path(tmp_plot).read_bytes(), config["out_plot"], fs=fs)
|
||||
print(f" -> {config['out_plot']} transfere sur S3")
|
||||
else:
|
||||
shutil.move(tmp_plot, config["out_plot"])
|
||||
print(f" -> {config['out_plot']} sauvegarde en local")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# RESUME FINAL
|
||||
# -------------------------------------------------------------------------
|
||||
print("\n" + "=" * 60)
|
||||
print(" RESUME DU PIPELINE")
|
||||
print("=" * 60)
|
||||
|
||||
print(f"\nStrategies Carmignac identifiees : "
|
||||
f"{loader.peers_df['carmignac_strategy'].nunique()}")
|
||||
print(f"Shareclasses Carmignac : {len(loader.carmignac_df)}")
|
||||
print(f"Fonds concurrents (actifs) : "
|
||||
f"{len(loader.competitors_df[~loader.competitors_df['is_index_fund']])}")
|
||||
|
||||
if not dataset.empty:
|
||||
rel_cols_present = [c for c in dataset.columns if c.startswith("rel_")]
|
||||
perf_cols_present = [c for c in dataset.columns if c.startswith("perf_")]
|
||||
aum_cols_present = [c for c in feature_cols if c.startswith("aum_")]
|
||||
print(f"\nFeatures AUM : {len(aum_cols_present)}")
|
||||
print(f"Features perf absolue : {len(perf_cols_present)}")
|
||||
print(f"Features perf relative (peers) : {len(rel_cols_present)}")
|
||||
print(f"Lignes dans le dataset : {len(dataset):,}")
|
||||
|
||||
if not results_df.empty:
|
||||
best = model.get_best_model()
|
||||
best_mae = results_df[results_df["model"] == best]["mae"].median()
|
||||
baseline_mae = results_df[results_df["model"] == "Baseline (zero)"]["mae"].median()
|
||||
gain = (1 - best_mae / baseline_mae) * 100 if baseline_mae > 0 else 0
|
||||
print(f"\nMeilleur modele : {best}")
|
||||
print(f"MAE mediane : {best_mae:,.0f} EUR")
|
||||
print(f"Gain vs baseline : {gain:.1f}%")
|
||||
if model.importances_:
|
||||
top = model.get_top_features(3)
|
||||
print(f"Top 3 features : {top}")
|
||||
|
||||
print("\nOutputs generes :")
|
||||
for key in ["out_dataset", "out_plot", "out_peers", "out_rel_perf"]:
|
||||
p = config.get(key, "")
|
||||
if p and path_exists(p, fs=fs):
|
||||
size = file_size_kb(p, fs=fs)
|
||||
print(f" OK {p:<50} ({size} Ko)")
|
||||
elif p:
|
||||
print(f" -- {p:<50} (non genere)")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
return {
|
||||
"loader": loader,
|
||||
"dataset": dataset,
|
||||
"model": model,
|
||||
"results_df": results_df,
|
||||
}
|
||||
|
||||
|
||||
# -- CLI -----------------------------------------------------------------------
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Pipeline Carmignac — Performance -> Flux nets"
|
||||
)
|
||||
parser.add_argument("--peers-dir", default=DEFAULT_CONFIG["peers_dir"],
|
||||
help="Dossier contenant les *_peers.csv (local ou s3://)")
|
||||
parser.add_argument("--stocks", default=DEFAULT_CONFIG["stocks_path"],
|
||||
help="Fichier AUM mensuel (local ou s3://)")
|
||||
parser.add_argument("--perf", default=DEFAULT_CONFIG["perf_path"],
|
||||
help="Fichier performances hebdomadaires (local ou s3://)")
|
||||
parser.add_argument("--out-dir", default=".",
|
||||
help="Dossier de sortie des outputs (local ou s3://)")
|
||||
parser.add_argument("--target-lag", default=DEFAULT_CONFIG["target_lag"],
|
||||
type=int, help="Horizon de prediction (mois)")
|
||||
parser.add_argument("--quiet", action="store_true",
|
||||
help="Mode silencieux")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
out_dir = args.out_dir.rstrip("/")
|
||||
|
||||
def out(filename: str) -> str:
|
||||
"""Construit un chemin de sortie dans le dossier cible (local ou S3)."""
|
||||
if _is_s3(out_dir):
|
||||
return f"{out_dir}/{filename}"
|
||||
p = Path(out_dir)
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return str(p / filename)
|
||||
|
||||
config = {
|
||||
**DEFAULT_CONFIG,
|
||||
"peers_dir": args.peers_dir,
|
||||
"stocks_path": args.stocks,
|
||||
"perf_path": args.perf,
|
||||
"target_lag": args.target_lag,
|
||||
"out_dataset": out("dataset_features.csv"),
|
||||
"out_plot": out("model_results.png"),
|
||||
"out_peers": out("peers_summary.csv"),
|
||||
"out_rel_perf": out("relative_performance.csv"),
|
||||
}
|
||||
|
||||
run_pipeline(config, verbose=not args.quiet)
|
||||
422
src/predictive_model.py
Normal file
422
src/predictive_model.py
Normal file
|
|
@ -0,0 +1,422 @@
|
|||
"""
|
||||
predictive_model.py
|
||||
────────────────────
|
||||
Modélisation prédictive des flux nets avec walk-forward validation.
|
||||
|
||||
Ce module est intentionnellement séparé du feature engineering :
|
||||
il prend en entrée le dataset produit par FeatureBuilder et se concentre
|
||||
sur l'entraînement, la validation et l'interprétation des modèles.
|
||||
|
||||
Modèles implémentés :
|
||||
- Baseline : prédiction zéro (benchmark naïf)
|
||||
- Ridge : régression linéaire régularisée (interprétable)
|
||||
- RandomForest : non-linéaire, robuste aux outliers
|
||||
- GradientBoosting : état de l'art sur données tabulaires
|
||||
|
||||
Validation : walk-forward expanding window (pas de data leakage).
|
||||
|
||||
Usage :
|
||||
from feature_engineering import FeatureBuilder
|
||||
from predictive_model import WalkForwardModel
|
||||
|
||||
feature_cols = FeatureBuilder.get_feature_columns(dataset)
|
||||
model = WalkForwardModel()
|
||||
results = model.fit_evaluate(dataset, feature_cols)
|
||||
model.plot_results(results, output_path="results.png")
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.gridspec as gridspec
|
||||
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.metrics import mean_absolute_error, r2_score
|
||||
from sklearn.inspection import permutation_importance
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
|
||||
# ── Constantes ────────────────────────────────────────────────────────────────
|
||||
|
||||
COLORS = ["#1f4e79", "#2e75b6", "#70ad47", "#ed7d31", "#a50026"]
|
||||
|
||||
MODEL_CONFIGS = {
|
||||
"Ridge": {
|
||||
"cls": Ridge,
|
||||
"kwargs": {"alpha": 1.0},
|
||||
"scale": True, # nécessite standardisation
|
||||
},
|
||||
"Random Forest": {
|
||||
"cls": RandomForestRegressor,
|
||||
"kwargs": {"n_estimators": 200, "max_depth": 6,
|
||||
"min_samples_leaf": 3, "random_state": 42, "n_jobs": -1},
|
||||
"scale": False,
|
||||
},
|
||||
"Gradient Boosting": {
|
||||
"cls": GradientBoostingRegressor,
|
||||
"kwargs": {"n_estimators": 200, "max_depth": 4,
|
||||
"learning_rate": 0.05, "subsample": 0.8,
|
||||
"random_state": 42},
|
||||
"scale": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ── Classe principale ─────────────────────────────────────────────────────────
|
||||
|
||||
class WalkForwardModel:
|
||||
"""
|
||||
Entraîne et évalue plusieurs modèles via walk-forward validation.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
date_col : colonne de date dans le dataset (snapshots mensuels)
|
||||
target_col : colonne de la variable cible
|
||||
min_train_frac : fraction minimale de dates en train (défaut 0.4)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
date_col: str = "Centralisation Date",
|
||||
target_col: str = "flux_net_proxy",
|
||||
min_train_frac: float = 0.4):
|
||||
self.date_col = date_col
|
||||
self.target_col = target_col
|
||||
self.min_train_frac = min_train_frac
|
||||
|
||||
# Stockage post-fit
|
||||
self.results_df_ = pd.DataFrame()
|
||||
self.importances_ = {}
|
||||
self.final_models_ = {}
|
||||
|
||||
# ── Walk-forward ──────────────────────────────────────────────────────────
|
||||
|
||||
def fit_evaluate(self,
|
||||
dataset: pd.DataFrame,
|
||||
feature_cols: list[str],
|
||||
verbose: bool = True) -> pd.DataFrame:
|
||||
"""
|
||||
Exécute le walk-forward expanding window sur tous les modèles.
|
||||
|
||||
Retourne un DataFrame avec les métriques par (modèle, date de test).
|
||||
"""
|
||||
dataset = dataset.copy()
|
||||
dataset[self.date_col] = pd.to_datetime(dataset[self.date_col])
|
||||
|
||||
dates_sorted = sorted(dataset[self.date_col].unique())
|
||||
n_dates = len(dates_sorted)
|
||||
min_train = max(3, int(n_dates * self.min_train_frac))
|
||||
|
||||
if verbose:
|
||||
print(f"Walk-forward : {n_dates} dates | min train = {min_train}")
|
||||
|
||||
if n_dates <= min_train:
|
||||
print("⚠ Pas assez de dates pour le walk-forward. "
|
||||
"Augmenter la taille du dataset.")
|
||||
return pd.DataFrame()
|
||||
|
||||
records = []
|
||||
for test_idx in range(min_train, n_dates):
|
||||
train_dates = dates_sorted[:test_idx]
|
||||
test_date = dates_sorted[test_idx]
|
||||
|
||||
train = dataset[dataset[self.date_col].isin(train_dates)]
|
||||
test = dataset[dataset[self.date_col] == test_date]
|
||||
|
||||
X_train, y_train = self._prepare(train, feature_cols, fit=True)
|
||||
X_test, y_test = self._prepare(test, feature_cols, fit=False)
|
||||
|
||||
if len(X_test) == 0 or y_test.std() == 0:
|
||||
continue
|
||||
|
||||
# Baseline
|
||||
records.append({
|
||||
"test_date": test_date,
|
||||
"model": "Baseline (zéro)",
|
||||
"mae": mean_absolute_error(y_test, np.zeros(len(y_test))),
|
||||
"r2": r2_score(y_test, np.zeros(len(y_test))),
|
||||
"n_test": len(y_test),
|
||||
"n_train": len(X_train),
|
||||
})
|
||||
|
||||
for model_name, cfg in MODEL_CONFIGS.items():
|
||||
model = cfg["cls"](**cfg["kwargs"])
|
||||
scaler = StandardScaler() if cfg["scale"] else None
|
||||
|
||||
X_tr = scaler.fit_transform(X_train) if scaler else X_train
|
||||
X_te = scaler.transform(X_test) if scaler else X_test
|
||||
|
||||
model.fit(X_tr, y_train)
|
||||
preds = model.predict(X_te)
|
||||
|
||||
records.append({
|
||||
"test_date": test_date,
|
||||
"model": model_name,
|
||||
"mae": mean_absolute_error(y_test, preds),
|
||||
"r2": r2_score(y_test, preds),
|
||||
"n_test": len(y_test),
|
||||
"n_train": len(X_train),
|
||||
})
|
||||
|
||||
self.results_df_ = pd.DataFrame(records)
|
||||
|
||||
if verbose:
|
||||
self._print_results_summary()
|
||||
|
||||
# Entraîner les modèles finaux sur toutes les données
|
||||
self._fit_final_models(dataset, feature_cols)
|
||||
|
||||
return self.results_df_
|
||||
|
||||
# ── Modèles finaux (pour importance des variables) ────────────────────────
|
||||
|
||||
def _fit_final_models(self, dataset: pd.DataFrame, feature_cols: list[str]):
|
||||
"""Entraîne chaque modèle sur l'intégralité du dataset (pour l'interprétation)."""
|
||||
X, y = self._prepare(dataset, feature_cols, fit=True)
|
||||
if len(X) == 0:
|
||||
return
|
||||
|
||||
for model_name, cfg in MODEL_CONFIGS.items():
|
||||
model = cfg["cls"](**cfg["kwargs"])
|
||||
scaler = StandardScaler() if cfg["scale"] else None
|
||||
X_fit = scaler.fit_transform(X) if scaler else X
|
||||
model.fit(X_fit, y)
|
||||
self.final_models_[model_name] = (model, scaler, feature_cols)
|
||||
|
||||
# Importance des variables : Random Forest (Gini) + Permutation
|
||||
rf_model, _, _ = self.final_models_.get("Random Forest", (None, None, None))
|
||||
if rf_model is not None:
|
||||
self.importances_["gini"] = pd.Series(
|
||||
rf_model.feature_importances_, index=feature_cols
|
||||
).sort_values(ascending=False)
|
||||
|
||||
perm = permutation_importance(
|
||||
rf_model, X, y, n_repeats=10, random_state=42, n_jobs=-1
|
||||
)
|
||||
self.importances_["permutation"] = pd.Series(
|
||||
perm.importances_mean, index=feature_cols
|
||||
).sort_values(ascending=False)
|
||||
|
||||
# ── Prédiction ────────────────────────────────────────────────────────────
|
||||
|
||||
def predict(self, new_data: pd.DataFrame,
|
||||
model_name: str = "Random Forest") -> np.ndarray:
|
||||
"""Prédit les flux nets sur de nouvelles données."""
|
||||
if model_name not in self.final_models_:
|
||||
raise ValueError(f"Modèle '{model_name}' non disponible. "
|
||||
f"Disponibles : {list(self.final_models_.keys())}")
|
||||
|
||||
model, scaler, feature_cols = self.final_models_[model_name]
|
||||
X, _ = self._prepare(new_data, feature_cols, fit=False)
|
||||
X_pred = scaler.transform(X) if scaler else X
|
||||
return model.predict(X_pred)
|
||||
|
||||
# ── Visualisation ─────────────────────────────────────────────────────────
|
||||
|
||||
def plot_results(self, output_path: str = "model_results.png"):
|
||||
"""Génère les graphiques de résultats du walk-forward."""
|
||||
|
||||
if self.results_df_.empty:
|
||||
print("⚠ Aucun résultat à visualiser (walk-forward non exécuté).")
|
||||
self._plot_schema(output_path)
|
||||
return
|
||||
|
||||
fig = plt.figure(figsize=(16, 14))
|
||||
fig.patch.set_facecolor("white")
|
||||
gs = gridspec.GridSpec(3, 2, figure=fig, hspace=0.45, wspace=0.35)
|
||||
|
||||
# ── [A] MAE par modèle et date ────────────────────────────────────────
|
||||
ax1 = fig.add_subplot(gs[0, :])
|
||||
for i, (model_name, grp) in enumerate(self.results_df_.groupby("model")):
|
||||
style = "--" if "Baseline" in model_name else "-"
|
||||
ax1.plot(grp["test_date"], grp["mae"],
|
||||
label=model_name, linewidth=1.8,
|
||||
color=COLORS[i % len(COLORS)], linestyle=style)
|
||||
ax1.set_title("Walk-Forward Validation — MAE par modèle", fontsize=13, fontweight="bold")
|
||||
ax1.set_ylabel("MAE (€)")
|
||||
ax1.legend(fontsize=9)
|
||||
ax1.tick_params(axis="x", rotation=20)
|
||||
|
||||
# ── [B] R² par modèle ─────────────────────────────────────────────────
|
||||
ax2 = fig.add_subplot(gs[1, 0])
|
||||
for i, (model_name, grp) in enumerate(self.results_df_.groupby("model")):
|
||||
if "Baseline" not in model_name:
|
||||
ax2.plot(grp["test_date"], grp["r2"].clip(-1, 1),
|
||||
label=model_name, linewidth=1.5,
|
||||
color=COLORS[i % len(COLORS)])
|
||||
ax2.axhline(0, color="black", linestyle="--", linewidth=1, alpha=0.5)
|
||||
ax2.set_title("R² par modèle", fontsize=12, fontweight="bold")
|
||||
ax2.set_ylabel("R²")
|
||||
ax2.legend(fontsize=9)
|
||||
ax2.tick_params(axis="x", rotation=20)
|
||||
|
||||
# ── [C] MAE agrégée (boîtes) ──────────────────────────────────────────
|
||||
ax3 = fig.add_subplot(gs[1, 1])
|
||||
model_names = self.results_df_["model"].unique().tolist()
|
||||
mae_by_model = [
|
||||
self.results_df_[self.results_df_["model"] == m]["mae"].dropna().values
|
||||
for m in model_names
|
||||
]
|
||||
bp = ax3.boxplot(mae_by_model, labels=model_names, patch_artist=True,
|
||||
medianprops=dict(color="black", linewidth=2))
|
||||
for patch, color in zip(bp["boxes"], COLORS):
|
||||
patch.set_facecolor(color)
|
||||
patch.set_alpha(0.7)
|
||||
ax3.set_title("Distribution de MAE (tous folds)", fontsize=12, fontweight="bold")
|
||||
ax3.set_ylabel("MAE (€)")
|
||||
ax3.tick_params(axis="x", rotation=20)
|
||||
|
||||
# ── [D] Importance des variables (Gini) ───────────────────────────────
|
||||
ax4 = fig.add_subplot(gs[2, 0])
|
||||
if "gini" in self.importances_:
|
||||
imp = self.importances_["gini"].head(15)
|
||||
colors_imp = [
|
||||
"#70ad47" if c.startswith("rel_") else
|
||||
"#ed7d31" if c.startswith("perf_") else
|
||||
"#1f4e79"
|
||||
for c in imp.index
|
||||
]
|
||||
ax4.barh(imp.index[::-1], imp.values[::-1], color=colors_imp[::-1])
|
||||
ax4.set_title("Importance (Gini) — Top 15 features", fontsize=12, fontweight="bold")
|
||||
ax4.set_xlabel("Importance relative")
|
||||
from matplotlib.patches import Patch
|
||||
legend_elements = [
|
||||
Patch(color="#70ad47", label="Perf relative (peers)"),
|
||||
Patch(color="#ed7d31", label="Perf absolue"),
|
||||
Patch(color="#1f4e79", label="Comportement AUM"),
|
||||
]
|
||||
ax4.legend(handles=legend_elements, fontsize=8, loc="lower right")
|
||||
else:
|
||||
ax4.axis("off")
|
||||
ax4.text(0.5, 0.5, "Importance des variables\nnon disponible",
|
||||
ha="center", va="center", fontsize=12)
|
||||
|
||||
# ── [E] Importance permutation ────────────────────────────────────────
|
||||
ax5 = fig.add_subplot(gs[2, 1])
|
||||
if "permutation" in self.importances_:
|
||||
pimp = self.importances_["permutation"].head(15)
|
||||
pimp = pimp[pimp > 0] # garder seulement les features utiles
|
||||
colors_pimp = [
|
||||
"#70ad47" if c.startswith("rel_") else
|
||||
"#ed7d31" if c.startswith("perf_") else
|
||||
"#1f4e79"
|
||||
for c in pimp.index
|
||||
]
|
||||
ax5.barh(pimp.index[::-1], pimp.values[::-1], color=colors_pimp[::-1])
|
||||
ax5.set_title("Permutation Importance — Top 15", fontsize=12, fontweight="bold")
|
||||
ax5.set_xlabel("Δ MAE moyen (permutation)")
|
||||
else:
|
||||
ax5.axis("off")
|
||||
|
||||
plt.suptitle("Carmignac × ENSAE — Résultats du modèle prédictif",
|
||||
fontsize=14, fontweight="bold", y=1.01)
|
||||
|
||||
plt.savefig(output_path, dpi=150, bbox_inches="tight", facecolor="white")
|
||||
plt.close()
|
||||
print(f"✅ Graphiques sauvegardés : {output_path}")
|
||||
|
||||
def _plot_schema(self, output_path: str):
|
||||
"""Affiche un schéma du pipeline si les données sont insuffisantes."""
|
||||
fig, ax = plt.subplots(figsize=(12, 6))
|
||||
fig.patch.set_facecolor("white")
|
||||
ax.axis("off")
|
||||
ax.set_xlim(0, 10)
|
||||
ax.set_ylim(0, 5)
|
||||
|
||||
schema = (
|
||||
"WALK-FORWARD VALIDATION — SCHÉMA\n\n"
|
||||
" t₁ t₂ t₃ t₄ t₅ t₆ t₇ ...\n"
|
||||
" ─────────────────────────────────\n"
|
||||
" TRAIN ████████ │TEST│\n"
|
||||
" TRAIN ███████████│TEST│\n"
|
||||
" TRAIN ██████████████│TEST│\n"
|
||||
" ...\n\n"
|
||||
"Principe :\n"
|
||||
" → Expanding window : la fenêtre de train s'agrandit à chaque fold\n"
|
||||
" → Test = 1 date future (mois suivant)\n"
|
||||
" → Aucune information future dans le train → pas de data leakage\n\n"
|
||||
"Métriques calculées à chaque fold :\n"
|
||||
" → MAE (Mean Absolute Error) en € de flux\n"
|
||||
" → R² (coefficient de détermination)\n\n"
|
||||
"Relancer avec les données complètes pour obtenir les résultats réels."
|
||||
)
|
||||
ax.text(0.5, 4.8, schema, va="top", fontsize=11, fontfamily="monospace",
|
||||
bbox=dict(boxstyle="round", facecolor="#eaf2fb", alpha=0.8))
|
||||
ax.set_title("Modèle prédictif — En attente de données complètes",
|
||||
fontsize=13, fontweight="bold")
|
||||
|
||||
plt.savefig(output_path, dpi=150, bbox_inches="tight", facecolor="white")
|
||||
plt.close()
|
||||
print(f"✅ Schéma sauvegardé : {output_path}")
|
||||
|
||||
# ── Helpers internes ──────────────────────────────────────────────────────
|
||||
|
||||
def _prepare(self, df: pd.DataFrame,
|
||||
feature_cols: list[str],
|
||||
fit: bool) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Extrait X et y depuis le DataFrame, gère les NaN."""
|
||||
available = [c for c in feature_cols if c in df.columns]
|
||||
X = df[available].fillna(0).values
|
||||
y = df[self.target_col].values if self.target_col in df.columns else np.array([])
|
||||
return X, y
|
||||
|
||||
def _print_results_summary(self):
|
||||
print("\n── Résultats walk-forward (médiane sur tous les folds) ──")
|
||||
summary = (
|
||||
self.results_df_
|
||||
.groupby("model")
|
||||
.agg(
|
||||
MAE_median=("mae", "median"),
|
||||
MAE_mean=("mae", "mean"),
|
||||
R2_median=("r2", "median"),
|
||||
n_folds=("mae", "count"),
|
||||
)
|
||||
.round(4)
|
||||
.sort_values("MAE_median")
|
||||
)
|
||||
print(summary.to_string())
|
||||
print("─────────────────────────────────────────────────────────")
|
||||
|
||||
# ── API publique ──────────────────────────────────────────────────────────
|
||||
|
||||
def get_best_model(self) -> str:
|
||||
"""Retourne le nom du modèle avec la MAE médiane la plus faible."""
|
||||
if self.results_df_.empty:
|
||||
return "Random Forest"
|
||||
summary = (self.results_df_
|
||||
[self.results_df_["model"] != "Baseline (zéro)"]
|
||||
.groupby("model")["mae"]
|
||||
.median())
|
||||
return summary.idxmin()
|
||||
|
||||
def get_top_features(self, n: int = 10,
|
||||
method: str = "permutation") -> list[str]:
|
||||
"""Retourne les n features les plus importantes."""
|
||||
if method not in self.importances_:
|
||||
method = list(self.importances_.keys())[0] if self.importances_ else None
|
||||
if method is None:
|
||||
return []
|
||||
return self.importances_[method].head(n).index.tolist()
|
||||
|
||||
|
||||
# ── Usage autonome ────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from feature_engineering import FeatureBuilder
|
||||
|
||||
dataset_path = sys.argv[1] if len(sys.argv) > 1 else "dataset_features.csv"
|
||||
|
||||
dataset = pd.read_csv(dataset_path)
|
||||
feature_cols = FeatureBuilder.get_feature_columns(dataset)
|
||||
|
||||
print(f"Dataset : {dataset.shape} | {len(feature_cols)} features")
|
||||
|
||||
model = WalkForwardModel()
|
||||
results = model.fit_evaluate(dataset, feature_cols)
|
||||
model.plot_results("model_results.png")
|
||||
|
||||
if not results.empty:
|
||||
print(f"\nMeilleur modèle : {model.get_best_model()}")
|
||||
print(f"Top features : {model.get_top_features(5)}")
|
||||
329
src/relative_performance.py
Normal file
329
src/relative_performance.py
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
"""
|
||||
relative_performance.py
|
||||
────────────────────────
|
||||
Calcul de métriques de performance relative de Carmignac vs ses vrais peers.
|
||||
|
||||
Contexte :
|
||||
weekly_perf contient déjà un percentile Morningstar (rang brut dans la
|
||||
catégorie). Ce module construit des métriques plus fines en s'appuyant
|
||||
sur la liste explicite de peers issue des fichiers *_peers.csv :
|
||||
|
||||
- Spread de performance Carmignac vs médiane des peers (par période)
|
||||
- Rang Carmignac dans son groupe de peers restreint (ex: top/bottom quartile)
|
||||
- Stabilité du rang sur fenêtres glissantes (volatilité du rang)
|
||||
- Ratio d'outperformance : % de semaines où Carmignac > médiane peers
|
||||
|
||||
Ces métriques sont ensuite utilisées comme features dans feature_engineering.py.
|
||||
|
||||
Usage :
|
||||
from peers_loader import PeersLoader
|
||||
from relative_performance import RelativePerformanceCalculator
|
||||
|
||||
loader = PeersLoader("peers/").load()
|
||||
calc = RelativePerformanceCalculator(loader)
|
||||
rel_df = calc.compute(weekly_perf_df)
|
||||
# → DataFrame avec colonnes rel_* par stratégie Carmignac
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
|
||||
|
||||
# ── Classe principale ─────────────────────────────────────────────────────────
|
||||
|
||||
class RelativePerformanceCalculator:
|
||||
"""
|
||||
Calcule les métriques de performance relative Carmignac vs peers.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
loader : PeersLoader (déjà chargé)
|
||||
"""
|
||||
|
||||
def __init__(self, loader):
|
||||
self.loader = loader
|
||||
|
||||
# ── Point d'entrée principal ──────────────────────────────────────────────
|
||||
|
||||
def compute(self, perf_df: pd.DataFrame,
|
||||
perf_periods: list[str] | None = None,
|
||||
verbose: bool = True) -> pd.DataFrame:
|
||||
"""
|
||||
Calcule toutes les métriques de performance relative.
|
||||
|
||||
Paramètres
|
||||
----------
|
||||
perf_df : DataFrame issu de weekly_perf_full.csv
|
||||
Colonnes attendues : Date, perfPeriod, shareClass_name, return, percentile
|
||||
perf_periods : liste des périodes à traiter (ex: ['6MoRet', '1YrRet'])
|
||||
Si None → toutes les périodes disponibles.
|
||||
|
||||
Retourne
|
||||
--------
|
||||
DataFrame avec index (carmignac_isin, carmignac_strategy, Date, perfPeriod)
|
||||
et colonnes de métriques relatives.
|
||||
"""
|
||||
perf_df = perf_df.copy()
|
||||
perf_df["Date"] = pd.to_datetime(perf_df["Date"])
|
||||
|
||||
if perf_periods is None:
|
||||
perf_periods = perf_df["perfPeriod"].dropna().unique().tolist()
|
||||
|
||||
# Résolution ISIN depuis les noms de shareclass
|
||||
perf_df = self._resolve_isin(perf_df)
|
||||
|
||||
# Séparation Carmignac / peers
|
||||
carm_perf = perf_df[perf_df["is_carmignac"]].copy()
|
||||
peers_perf = perf_df[~perf_df["is_carmignac"]].copy()
|
||||
|
||||
if carm_perf.empty:
|
||||
raise ValueError(
|
||||
"Aucune ligne Carmignac trouvée dans perf_df après résolution ISIN. "
|
||||
"Vérifier que les noms de shareclass correspondent aux fichiers peers."
|
||||
)
|
||||
|
||||
results = []
|
||||
for strategy_code in self.loader.peers_df["carmignac_strategy"].unique():
|
||||
carm_isin_list = self.loader.get_carmignac_isin_for_strategy(strategy_code)
|
||||
peers_isin_list = self.loader.get_competitor_isin_for_strategy(
|
||||
strategy_code, exclude_index=True
|
||||
)
|
||||
|
||||
if not carm_isin_list:
|
||||
continue
|
||||
|
||||
for period in perf_periods:
|
||||
df_period = self._compute_period(
|
||||
carm_perf, peers_perf,
|
||||
carm_isin_list, peers_isin_list,
|
||||
strategy_code, period
|
||||
)
|
||||
if df_period is not None:
|
||||
results.append(df_period)
|
||||
|
||||
if not results:
|
||||
if verbose:
|
||||
print("⚠ Aucune métrique calculée. Vérifier le chevauchement "
|
||||
"temporel entre perf_df et les peers.")
|
||||
return pd.DataFrame()
|
||||
|
||||
out = pd.concat(results, ignore_index=True)
|
||||
|
||||
if verbose:
|
||||
print(f"✅ Métriques relatives calculées : {out.shape[0]} lignes")
|
||||
print(f" Stratégies : {out['carmignac_strategy'].unique().tolist()}")
|
||||
print(f" Périodes : {out['perfPeriod'].unique().tolist()}")
|
||||
cols = [c for c in out.columns if c.startswith("rel_")]
|
||||
print(f" Features : {cols}")
|
||||
|
||||
return out
|
||||
|
||||
# ── Calcul par (stratégie, période) ──────────────────────────────────────
|
||||
|
||||
def _compute_period(self,
|
||||
carm_perf: pd.DataFrame,
|
||||
peers_perf: pd.DataFrame,
|
||||
carm_isin_list: list[str],
|
||||
peers_isin_list: list[str],
|
||||
strategy_code: str,
|
||||
period: str) -> pd.DataFrame | None:
|
||||
|
||||
# Filtrer sur la stratégie et la période
|
||||
c = carm_perf[
|
||||
carm_perf["isin"].isin(carm_isin_list) &
|
||||
(carm_perf["perfPeriod"] == period)
|
||||
].copy()
|
||||
|
||||
p = peers_perf[
|
||||
peers_perf["isin"].isin(peers_isin_list) &
|
||||
(peers_perf["perfPeriod"] == period)
|
||||
].copy()
|
||||
|
||||
if c.empty or p.empty:
|
||||
return None
|
||||
|
||||
# Agrégation Carmignac : moyenne des shareclasses (si plusieurs)
|
||||
c_agg = (c.groupby("Date")["return"]
|
||||
.mean()
|
||||
.reset_index()
|
||||
.rename(columns={"return": "carm_return"}))
|
||||
|
||||
# Stats des peers par date
|
||||
p_stats = (p.groupby("Date")["return"]
|
||||
.agg(
|
||||
peers_median="median",
|
||||
peers_mean="mean",
|
||||
peers_q25=lambda x: x.quantile(0.25),
|
||||
peers_q75=lambda x: x.quantile(0.75),
|
||||
peers_std="std",
|
||||
peers_n="count",
|
||||
)
|
||||
.reset_index())
|
||||
|
||||
# Jointure
|
||||
merged = c_agg.merge(p_stats, on="Date", how="inner")
|
||||
if merged.empty:
|
||||
return None
|
||||
|
||||
# ── Métriques relatives ───────────────────────────────────────────────
|
||||
|
||||
# 1. Spread vs médiane (en points de %)
|
||||
merged["rel_spread_vs_median"] = merged["carm_return"] - merged["peers_median"]
|
||||
|
||||
# 2. Spread vs moyenne
|
||||
merged["rel_spread_vs_mean"] = merged["carm_return"] - merged["peers_mean"]
|
||||
|
||||
# 3. Rang dans le groupe (0 = meilleur, 1 = pire)
|
||||
# Recalculé proprement depuis les données brutes
|
||||
merged["rel_rank_in_peers"] = merged.apply(
|
||||
lambda row: self._compute_rank(
|
||||
row["carm_return"],
|
||||
p[p["Date"] == row["Date"]]["return"].values
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
# 4. Quartile (1=top, 4=bottom)
|
||||
merged["rel_quartile"] = (merged["rel_rank_in_peers"] * 4).apply(
|
||||
lambda x: min(int(x) + 1, 4) if pd.notna(x) else np.nan
|
||||
)
|
||||
|
||||
# 5. Dummy top quartile / bottom quartile
|
||||
merged["rel_is_top_quartile"] = (merged["rel_quartile"] == 1).astype(float)
|
||||
merged["rel_is_bottom_quartile"] = (merged["rel_quartile"] == 4).astype(float)
|
||||
|
||||
# 6. Z-score dans la distribution des peers
|
||||
merged["rel_zscore"] = (
|
||||
(merged["carm_return"] - merged["peers_mean"]) /
|
||||
merged["peers_std"].replace(0, np.nan)
|
||||
)
|
||||
|
||||
# 7. Volatilité du rang sur fenêtre glissante (12 semaines)
|
||||
merged = merged.sort_values("Date")
|
||||
merged["rel_rank_volatility_12w"] = (
|
||||
merged["rel_rank_in_peers"]
|
||||
.rolling(12, min_periods=4)
|
||||
.std()
|
||||
)
|
||||
|
||||
# 8. Ratio d'outperformance glissant (26 semaines = ~6 mois)
|
||||
merged["rel_outperf_ratio_26w"] = (
|
||||
(merged["rel_spread_vs_median"] > 0)
|
||||
.astype(float)
|
||||
.rolling(26, min_periods=8)
|
||||
.mean()
|
||||
)
|
||||
|
||||
# 9. Momentum de rang (amélioration du rang sur 4 semaines)
|
||||
merged["rel_rank_momentum_4w"] = (
|
||||
merged["rel_rank_in_peers"]
|
||||
.diff(-4) # négatif = amélioration (rang plus bas = mieux)
|
||||
)
|
||||
|
||||
# Colonnes de contexte
|
||||
merged["carmignac_strategy"] = strategy_code
|
||||
merged["perfPeriod"] = period
|
||||
merged["peers_n_funds"] = merged["peers_n"]
|
||||
|
||||
cols_out = (
|
||||
["Date", "carmignac_strategy", "perfPeriod",
|
||||
"carm_return", "peers_median", "peers_q25", "peers_q75",
|
||||
"peers_n_funds"]
|
||||
+ [c for c in merged.columns if c.startswith("rel_")]
|
||||
)
|
||||
return merged[[c for c in cols_out if c in merged.columns]]
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _resolve_isin(self, perf_df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Ajoute une colonne 'isin' et 'is_carmignac' à perf_df
|
||||
en résolvant les noms de shareclass via PeersLoader.
|
||||
"""
|
||||
perf_df = perf_df.copy()
|
||||
|
||||
# Mapping direct depuis peers
|
||||
all_isin_map = self.loader.isin_map # Name → ISIN
|
||||
|
||||
def resolve(name):
|
||||
return self.loader.resolve_shareclass_name(name)
|
||||
|
||||
perf_df["isin"] = perf_df["shareClass_name"].map(resolve)
|
||||
|
||||
# Identifier les fonds Carmignac
|
||||
carm_isins = set(self.loader.carmignac_df["ISIN"].dropna())
|
||||
perf_df["is_carmignac"] = perf_df["isin"].isin(carm_isins)
|
||||
|
||||
n_resolved = perf_df["isin"].notna().sum()
|
||||
n_total = len(perf_df)
|
||||
n_carmignac = perf_df["is_carmignac"].sum()
|
||||
print(f"Résolution ISIN : {n_resolved}/{n_total} lignes résolues "
|
||||
f"({n_carmignac} Carmignac, {n_resolved - n_carmignac} peers)")
|
||||
|
||||
return perf_df
|
||||
|
||||
@staticmethod
|
||||
def _compute_rank(carm_value: float, peers_values: np.ndarray) -> float:
|
||||
"""
|
||||
Rang de carm_value dans peers_values (percentile, 0=meilleur, 1=pire).
|
||||
Un rang faible = Carmignac surperforme ses peers.
|
||||
"""
|
||||
if len(peers_values) == 0 or np.isnan(carm_value):
|
||||
return np.nan
|
||||
all_values = np.append(peers_values, carm_value)
|
||||
all_values = all_values[~np.isnan(all_values)]
|
||||
# Percentile de rang (inversé : 0 = meilleur)
|
||||
rank = 1.0 - stats.percentileofscore(all_values, carm_value, kind="rank") / 100.0
|
||||
return rank
|
||||
|
||||
|
||||
# ── Fonctions utilitaires standalone ─────────────────────────────────────────
|
||||
|
||||
def summarize_relative_performance(rel_df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Produit un tableau résumé des métriques relatives par stratégie et période.
|
||||
Utile pour un reporting rapide ou un tableau de bord.
|
||||
"""
|
||||
if rel_df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
agg = (rel_df
|
||||
.groupby(["carmignac_strategy", "perfPeriod"])
|
||||
.agg(
|
||||
median_spread=("rel_spread_vs_median", "median"),
|
||||
pct_outperform=("rel_outperf_ratio_26w", "mean"),
|
||||
avg_quartile=("rel_quartile", "mean"),
|
||||
pct_top_quartile=("rel_is_top_quartile", "mean"),
|
||||
pct_bottom_quartile=("rel_is_bottom_quartile", "mean"),
|
||||
avg_zscore=("rel_zscore", "mean"),
|
||||
avg_rank_vol=("rel_rank_volatility_12w", "mean"),
|
||||
n_obs=("carm_return", "count"),
|
||||
)
|
||||
.round(3)
|
||||
.reset_index())
|
||||
return agg
|
||||
|
||||
|
||||
# ── Usage autonome ────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from peers_loader import PeersLoader
|
||||
|
||||
peers_dir = sys.argv[1] if len(sys.argv) > 1 else "."
|
||||
perf_path = sys.argv[2] if len(sys.argv) > 2 else "weekly_perf_full.csv"
|
||||
|
||||
print("Chargement des peers...")
|
||||
loader = PeersLoader(peers_dir=peers_dir).load()
|
||||
|
||||
print("\nChargement des performances...")
|
||||
perf_df = pd.read_csv(perf_path)
|
||||
perf_df["Date"] = pd.to_datetime(perf_df["Date"])
|
||||
|
||||
print("\nCalcul des métriques relatives...")
|
||||
calc = RelativePerformanceCalculator(loader)
|
||||
rel_df = calc.compute(perf_df)
|
||||
|
||||
print("\nRésumé des performances relatives :")
|
||||
print(summarize_relative_performance(rel_df).to_string(index=False))
|
||||
Loading…
Reference in New Issue
Block a user