import pandas as pd from data_loader import load_and_clean_data from features import compute_static_features, compute_market_sensitivities from clustering import run_clustering_pipeline, get_cluster_profiles def main(): print("--- Starting Carmignac Client Clustering Pipeline ---") print("Loading data...") flows, aum, rates, gov = load_and_clean_data( 'data/flows_sample.csv', 'data/aum_sample.csv', 'data/str_rates.csv', 'data/eur_gov_indices.csv' ) print("Computing static features...") static_feats = compute_static_features(flows, aum) print("Computing market sensitivities (Betas)...") # Use 'W' (Weekly) to maximize points for the sample. # Use 'M' (Monthly) for the full dataset. sensitivity_feats = compute_market_sensitivities(flows, rates, gov, freq='W') full_features = static_feats.join(sensitivity_feats, how='left') # Clustering print(f"Running Clustering on {len(full_features)} clients...") clustered_df, centers = run_clustering_pipeline(full_features, n_clusters=3) print("\n--- Cluster Profiles (Mean Values) ---") profiles = get_cluster_profiles(clustered_df) print(profiles.T) clustered_df.to_csv('clustering/client_clusters.csv') print("\nResults saved to 'clustering/client_clusters.csv'") if __name__ == "__main__": main()