import pandas as pd from data_loader import load_and_clean_data from features import compute_static_features, compute_market_sensitivities from clustering import run_clustering_pipeline, get_cluster_profiles def main(): print("--- Starting Carmignac Client Clustering Pipeline ---") # 1. Load Data print("Loading data...") flows, aum, rates, gov = load_and_clean_data( 'flows_sample.csv', 'aum_sample.csv', 'str_rates.csv', 'eur_gov_indices.csv' ) # 2. Feature Engineering print("Computing static features...") static_feats = compute_static_features(flows, aum) print("Computing market sensitivities (Betas)...") # Note: Using 'W' (Weekly) to maximize points for the sample. # Use 'M' (Monthly) for the full dataset. sensitivity_feats = compute_market_sensitivities(flows, rates, gov, freq='W') # Merge features full_features = static_feats.join(sensitivity_feats, how='left') # 3. Clustering print(f"Running Clustering on {len(full_features)} clients...") clustered_df, centers = run_clustering_pipeline(full_features, n_clusters=3) # 4. Results print("\n--- Cluster Profiles (Mean Values) ---") profiles = get_cluster_profiles(clustered_df) print(profiles.T) clustered_df.to_csv('client_clusters.csv') print("\nResults saved to 'client_clusters.csv'") if __name__ == "__main__": main()