import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score, davies_bouldin_score from sklearn.metrics import confusion_matrix, classification_report from sklearn.preprocessing import LabelEncoder # Create a synthetic dataset X, y_true = datasets.make_blobs(n_samples=300, centers=3, cluster_std=0.60, random_state=0) # Fit the K-Means algorithm kmeans = KMeans(n_clusters=3, random_state=0) y_kmeans = kmeans.fit_predict(X) # Performance Metrics silhouette_avg = silhouette_score(X, y_kmeans) davies_bouldin = davies_bouldin_score(X, y_kmeans) print("Silhouette Score: ", silhouette_avg) print("Davies-Bouldin Score: ", davies_bouldin) # Confusion Matrix and Classification Report (if true labels are available) # Relabel the clusters to match the true labels def relabel_clusters(y_true, y_pred): label_encoder = LabelEncoder() y_true_encoded = label_encoder.fit_transform(y_true) cm = confusion_matrix(y_true_encoded, y_pred) return np.argmax(cm, axis=1) relabel_map = relabel_clusters(y_true, y_kmeans) y_kmeans_mapped = np.array([relabel_map[label] for label in y_kmeans]) # Display confusion matrix and classification report print("Confusion Matrix:\n", confusion_matrix(y_true, y_kmeans_mapped)) print("Classification Report:\n", classification_report(y_true, y_kmeans_mapped)) # Visualization of the clusters plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis') centers = kmeans.cluster_centers_ plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75, marker='X') plt.title('K-Means Clustering') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter