Project_Carmignac/clustering/clustering.py
2026-02-02 11:31:08 +00:00

28 lines
943 B
Python

from sklearn.preprocessing import RobustScaler
from sklearn.cluster import KMeans
def run_clustering_pipeline(feature_df, n_clusters=4):
"""
Scales features and clusters clients.
"""
# 1. Preprocessing
# Fill missing sensitivities with 0 (neutral) for clients with insufficient history
df_clean = feature_df.fillna(0)
# RobustScaler over StandardScaler for financial data bc less influenced by 'Whale' clients.
scaler = RobustScaler()
scaled_data = scaler.fit_transform(df_clean)
# 2. Clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
labels = kmeans.fit_predict(scaled_data)
# 3. Attach labels
results = df_clean.copy()
results['Cluster'] = labels
return results, kmeans.cluster_centers_
def get_cluster_profiles(results_df):
"""Returns the average profile of each cluster."""
return results_df.groupby('Cluster').mean()