Project_Carmignac/clustering/visualize.py

57 lines
1.8 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler
def plot_clusters():
print("--- Generating Cluster Visualization ---")
# 1. Load the results from main.py
try:
df = pd.read_csv('client_clusters.csv', index_col=0)
except FileNotFoundError:
print("Error: Run main.py first to generate 'client_clusters.csv'")
return
# 2. Prepare Data for PCA
# Drop non-numeric or ID columns if any linger (though index handled it)
X = df.drop(columns=['Cluster'])
# Scale (Critical for PCA)
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)
# 3. Run PCA (Reduce to 2 Dimensions)
pca = PCA(n_components=2)
components = pca.fit_transform(X_scaled)
# Create plotting DataFrame
plot_df = pd.DataFrame(data=components, columns=['PC1', 'PC2'], index=X.index)
plot_df['Cluster'] = df['Cluster'].astype(str) # Convert to string for discrete colors
# 4. Plot
plt.figure(figsize=(12, 8))
sns.scatterplot(
data=plot_df,
x='PC1',
y='PC2',
hue='Cluster',
style='Cluster',
palette='viridis',
s=60,
alpha=0.8
)
plt.title('Client Segmentation Map (PCA Projection)', fontsize=16)
plt.xlabel(f'Principal Component 1 ({pca.explained_variance_ratio_[0]:.1%} Variance)', fontsize=12)
plt.ylabel(f'Principal Component 2 ({pca.explained_variance_ratio_[1]:.1%} Variance)', fontsize=12)
plt.legend(title='Cluster ID', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.savefig('cluster_map.png', dpi=300)
print("Visualization saved to 'cluster_map.png'")
if __name__ == "__main__":
plot_clusters()