import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.decomposition import PCA from sklearn.preprocessing import RobustScaler def plot_clusters(): print("--- Generating Cluster Visualization ---") # 1. Load the results from main.py try: df = pd.read_csv('client_clusters.csv', index_col=0) except FileNotFoundError: print("Error: Run main.py first to generate 'client_clusters.csv'") return # 2. Prepare Data for PCA # Drop non-numeric or ID columns if any linger (though index handled it) X = df.drop(columns=['Cluster']) # Scale (Critical for PCA) scaler = RobustScaler() X_scaled = scaler.fit_transform(X) # 3. Run PCA (Reduce to 2 Dimensions) pca = PCA(n_components=2) components = pca.fit_transform(X_scaled) # Create plotting DataFrame plot_df = pd.DataFrame(data=components, columns=['PC1', 'PC2'], index=X.index) plot_df['Cluster'] = df['Cluster'].astype(str) # Convert to string for discrete colors # 4. Plot plt.figure(figsize=(12, 8)) sns.scatterplot( data=plot_df, x='PC1', y='PC2', hue='Cluster', style='Cluster', palette='viridis', s=60, alpha=0.8 ) plt.title('Client Segmentation Map (PCA Projection)', fontsize=16) plt.xlabel(f'Principal Component 1 ({pca.explained_variance_ratio_[0]:.1%} Variance)', fontsize=12) plt.ylabel(f'Principal Component 2 ({pca.explained_variance_ratio_[1]:.1%} Variance)', fontsize=12) plt.legend(title='Cluster ID', bbox_to_anchor=(1.05, 1), loc='upper left') plt.grid(True, linestyle='--', alpha=0.3) plt.tight_layout() plt.savefig('cluster_map.png', dpi=300) print("Visualization saved to 'cluster_map.png'") if __name__ == "__main__": plot_clusters()