from sklearn.preprocessing import RobustScaler
from sklearn.cluster import KMeans

def run_clustering_pipeline(feature_df, n_clusters=4):
    """
    Scales features and clusters clients.
    """
    # 1. Preprocessing
    # Fill missing sensitivities with 0 (neutral) for clients with insufficient history
    df_clean = feature_df.fillna(0)
    
    # Scaling: RobustScaler is preferred over StandardScaler for financial data 
    # because it is less influenced by 'Whale' clients (outliers).
    scaler = RobustScaler()
    scaled_data = scaler.fit_transform(df_clean)
    
    # 2. Clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    labels = kmeans.fit_predict(scaled_data)
    
    # 3. Attach labels
    results = df_clean.copy()
    results['Cluster'] = labels
    
    return results, kmeans.cluster_centers_

def get_cluster_profiles(results_df):
    """Returns the average profile of each cluster."""
    return results_df.groupby('Cluster').mean()