2026-02-02 11:37:16 +01:00
|
|
|
import pandas as pd
|
|
|
|
|
from data_loader import load_and_clean_data
|
|
|
|
|
from features import compute_static_features, compute_market_sensitivities
|
|
|
|
|
from clustering import run_clustering_pipeline, get_cluster_profiles
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
print("--- Starting Carmignac Client Clustering Pipeline ---")
|
|
|
|
|
|
|
|
|
|
print("Loading data...")
|
|
|
|
|
flows, aum, rates, gov = load_and_clean_data(
|
2026-02-02 19:00:03 +01:00
|
|
|
rates_path='data/str_rates.csv',
|
|
|
|
|
gov_path='data/eur_gov_indices.csv'
|
2026-02-02 11:37:16 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
print("Computing static features...")
|
|
|
|
|
static_feats = compute_static_features(flows, aum)
|
|
|
|
|
|
|
|
|
|
print("Computing market sensitivities (Betas)...")
|
2026-02-02 12:31:08 +01:00
|
|
|
# Use 'W' (Weekly) to maximize points for the sample.
|
2026-02-02 11:37:16 +01:00
|
|
|
# Use 'M' (Monthly) for the full dataset.
|
2026-02-02 19:00:03 +01:00
|
|
|
sensitivity_feats = compute_market_sensitivities(flows, aum, rates, gov, freq='W')
|
2026-02-02 11:37:16 +01:00
|
|
|
|
|
|
|
|
full_features = static_feats.join(sensitivity_feats, how='left')
|
|
|
|
|
|
2026-02-02 12:31:08 +01:00
|
|
|
# Clustering
|
2026-02-02 11:37:16 +01:00
|
|
|
print(f"Running Clustering on {len(full_features)} clients...")
|
|
|
|
|
clustered_df, centers = run_clustering_pipeline(full_features, n_clusters=3)
|
|
|
|
|
|
|
|
|
|
print("\n--- Cluster Profiles (Mean Values) ---")
|
|
|
|
|
profiles = get_cluster_profiles(clustered_df)
|
|
|
|
|
print(profiles.T)
|
|
|
|
|
|
2026-02-02 12:31:08 +01:00
|
|
|
clustered_df.to_csv('clustering/client_clusters.csv')
|
|
|
|
|
print("\nResults saved to 'clustering/client_clusters.csv'")
|
2026-02-02 11:37:16 +01:00
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|