57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.preprocessing import RobustScaler
|
|
|
|
def plot_clusters():
|
|
print("--- Generating Cluster Visualization ---")
|
|
|
|
# 1. Load the results from main.py
|
|
try:
|
|
df = pd.read_csv('client_clusters.csv', index_col=0)
|
|
except FileNotFoundError:
|
|
print("Error: Run main.py first to generate 'client_clusters.csv'")
|
|
return
|
|
|
|
# 2. Prepare Data for PCA
|
|
# Drop non-numeric or ID columns if any linger (though index handled it)
|
|
X = df.drop(columns=['Cluster'])
|
|
|
|
# Scale (Critical for PCA)
|
|
scaler = RobustScaler()
|
|
X_scaled = scaler.fit_transform(X)
|
|
|
|
# 3. Run PCA (Reduce to 2 Dimensions)
|
|
pca = PCA(n_components=2)
|
|
components = pca.fit_transform(X_scaled)
|
|
|
|
# Create plotting DataFrame
|
|
plot_df = pd.DataFrame(data=components, columns=['PC1', 'PC2'], index=X.index)
|
|
plot_df['Cluster'] = df['Cluster'].astype(str) # Convert to string for discrete colors
|
|
|
|
# 4. Plot
|
|
plt.figure(figsize=(12, 8))
|
|
sns.scatterplot(
|
|
data=plot_df,
|
|
x='PC1',
|
|
y='PC2',
|
|
hue='Cluster',
|
|
style='Cluster',
|
|
palette='viridis',
|
|
s=60,
|
|
alpha=0.8
|
|
)
|
|
|
|
plt.title('Client Segmentation Map (PCA Projection)', fontsize=16)
|
|
plt.xlabel(f'Principal Component 1 ({pca.explained_variance_ratio_[0]:.1%} Variance)', fontsize=12)
|
|
plt.ylabel(f'Principal Component 2 ({pca.explained_variance_ratio_[1]:.1%} Variance)', fontsize=12)
|
|
plt.legend(title='Cluster ID', bbox_to_anchor=(1.05, 1), loc='upper left')
|
|
plt.grid(True, linestyle='--', alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('cluster_map.png', dpi=300)
|
|
print("Visualization saved to 'cluster_map.png'")
|
|
|
|
if __name__ == "__main__":
|
|
plot_clusters() |