import pandas as pd
from data_loader import load_and_clean_data
from features import compute_static_features, compute_shock_sensitivities
from clustering import run_clustering_pipeline, get_cluster_profiles

def main():
    print("--- Starting Carmignac Client Clustering Pipeline ---")
    
    print("Loading data...")
    flows, aum, rates, gov = load_and_clean_data(
        rates_path='data/str_rates.csv', 
        gov_path='data/eur_gov_indices.csv'
    )

    # 2. Feature Engineering
    print("Computing static features...")
    static_feats = compute_static_features(flows, aum)
    
    # Option 1: Run Shock Model (Default)
    sensitivity_feats = compute_shock_sensitivities(flows, aum, rates, gov, freq='ME')

    # Option 2: Run Linear Model (Uncomment to use)
    # sensitivity_feats = compute_linear_sensitivities(flows, aum, rates, gov, freq='ME')
    
    # Merge features
    full_features = static_feats.join(sensitivity_feats, how='left')
    
    # Fill missing sensitivities with 0 (Passive clients)
    shock_cols = ['alpha_normal', 'beta_rate_spike', 'beta_rate_drop', 
                  'beta_bond_rally', 'beta_bond_crash', 'shock_r_squared']
    full_features[shock_cols] = full_features[shock_cols].fillna(0)
        
    print(f"Final Feature Matrix: {full_features.shape}")
    
    # 3. Clustering
    print("Running Clustering...")
    clustered_df, centers, scaler = run_clustering_pipeline(full_features, n_clusters=3)
    
    # 4. Results
    print("\n--- Cluster Profiles (Mean Values) ---")
    profiles = get_cluster_profiles(clustered_df)
    print(profiles.T)
    
    clustered_df.to_csv('client_clusters.csv')
    print("\nResults saved to 'client_clusters.csv'")

if __name__ == "__main__":
    main()