import pandas as pd from data_loader import load_and_clean_data from features import compute_static_features, compute_shock_sensitivities from clustering import run_clustering_pipeline, get_cluster_profiles def main(): print("--- Starting Carmignac Client Clustering Pipeline ---") print("Loading data...") flows, aum, rates, gov = load_and_clean_data( rates_path='data/str_rates.csv', gov_path='data/eur_gov_indices.csv' ) # 2. Feature Engineering print("Computing static features...") static_feats = compute_static_features(flows, aum) # Option 1: Run Shock Model (Default) sensitivity_feats = compute_shock_sensitivities(flows, aum, rates, gov, freq='ME') # Option 2: Run Linear Model (Uncomment to use) # sensitivity_feats = compute_linear_sensitivities(flows, aum, rates, gov, freq='ME') # Merge features full_features = static_feats.join(sensitivity_feats, how='left') # Fill missing sensitivities with 0 (Passive clients) shock_cols = ['alpha_normal', 'beta_rate_spike', 'beta_rate_drop', 'beta_bond_rally', 'beta_bond_crash', 'shock_r_squared'] full_features[shock_cols] = full_features[shock_cols].fillna(0) print(f"Final Feature Matrix: {full_features.shape}") # 3. Clustering print("Running Clustering...") clustered_df, centers, scaler = run_clustering_pipeline(full_features, n_clusters=3) # 4. Results print("\n--- Cluster Profiles (Mean Values) ---") profiles = get_cluster_profiles(clustered_df) print(profiles.T) clustered_df.to_csv('client_clusters.csv') print("\nResults saved to 'client_clusters.csv'") if __name__ == "__main__": main()