Project_Carmignac/clustering/main.py

48 lines
1.7 KiB
Python
Raw Normal View History

2026-02-02 11:37:16 +01:00
import pandas as pd
from data_loader import load_and_clean_data
2026-02-08 17:06:15 +01:00
from features import compute_static_features, compute_shock_sensitivities
2026-02-02 11:37:16 +01:00
from clustering import run_clustering_pipeline, get_cluster_profiles
def main():
print("--- Starting Carmignac Client Clustering Pipeline ---")
print("Loading data...")
flows, aum, rates, gov = load_and_clean_data(
2026-02-02 19:00:03 +01:00
rates_path='data/str_rates.csv',
gov_path='data/eur_gov_indices.csv'
2026-02-02 11:37:16 +01:00
)
2026-02-08 17:06:15 +01:00
# 2. Feature Engineering
2026-02-02 11:37:16 +01:00
print("Computing static features...")
static_feats = compute_static_features(flows, aum)
2026-02-08 17:06:15 +01:00
# Option 1: Run Shock Model (Default)
sensitivity_feats = compute_shock_sensitivities(flows, aum, rates, gov, freq='ME')
# Option 2: Run Linear Model (Uncomment to use)
# sensitivity_feats = compute_linear_sensitivities(flows, aum, rates, gov, freq='ME')
2026-02-02 11:37:16 +01:00
2026-02-08 17:06:15 +01:00
# Merge features
2026-02-02 11:37:16 +01:00
full_features = static_feats.join(sensitivity_feats, how='left')
2026-02-08 17:06:15 +01:00
# Fill missing sensitivities with 0 (Passive clients)
shock_cols = ['alpha_normal', 'beta_rate_spike', 'beta_rate_drop',
'beta_bond_rally', 'beta_bond_crash', 'shock_r_squared']
full_features[shock_cols] = full_features[shock_cols].fillna(0)
print(f"Final Feature Matrix: {full_features.shape}")
# 3. Clustering
print("Running Clustering...")
clustered_df, centers, scaler = run_clustering_pipeline(full_features, n_clusters=3)
2026-02-02 11:37:16 +01:00
2026-02-08 17:06:15 +01:00
# 4. Results
2026-02-02 11:37:16 +01:00
print("\n--- Cluster Profiles (Mean Values) ---")
profiles = get_cluster_profiles(clustered_df)
2026-02-08 17:06:15 +01:00
print(profiles.T)
2026-02-02 11:37:16 +01:00
2026-02-08 17:06:15 +01:00
clustered_df.to_csv('client_clusters.csv')
print("\nResults saved to 'client_clusters.csv'")
2026-02-02 11:37:16 +01:00
if __name__ == "__main__":
main()