Project_Carmignac/clustering/main.py

import pandas as pd
from data_loader import load_and_clean_data
from features import compute_static_features, compute_shock_sensitivities
from clustering import run_clustering_pipeline, get_cluster_profiles

def main():
    print("--- Starting Carmignac Client Clustering Pipeline ---")

    print("Loading data...")
    flows, aum, rates, gov = load_and_clean_data(
        rates_path='data/str_rates.csv',
        gov_path='data/eur_gov_indices.csv'
    )

    # 2. Feature Engineering
    print("Computing static features...")
    static_feats = compute_static_features(flows, aum)

    # Option 1: Run Shock Model (Default)
    sensitivity_feats = compute_shock_sensitivities(flows, aum, rates, gov, freq='ME')

    # Option 2: Run Linear Model (Uncomment to use)
    # sensitivity_feats = compute_linear_sensitivities(flows, aum, rates, gov, freq='ME')

    # Merge features
    full_features = static_feats.join(sensitivity_feats, how='left')

    # Fill missing sensitivities with 0 (Passive clients)
    shock_cols = ['alpha_normal', 'beta_rate_spike', 'beta_rate_drop',
                  'beta_bond_rally', 'beta_bond_crash', 'shock_r_squared']
    full_features[shock_cols] = full_features[shock_cols].fillna(0)

    print(f"Final Feature Matrix: {full_features.shape}")

    # 3. Clustering
    print("Running Clustering...")
    clustered_df, centers, scaler = run_clustering_pipeline(full_features, n_clusters=3)

    # 4. Results
    print("\n--- Cluster Profiles (Mean Values) ---")
    profiles = get_cluster_profiles(clustered_df)
    print(profiles.T)

    clustered_df.to_csv('client_clusters.csv')
    print("\nResults saved to 'client_clusters.csv'")

if __name__ == "__main__":
    main()