K MEANS

PHOTO

Wed Nov 06 2024 17:26:38 GMT+0000 (Coordinated Universal Time)

from sklearn.datasets import load_iris
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, silhouette_score

# Step 1: Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
print("DataFrame Head:\n", df.head())
print("DataFrame Info:\n", df.info())

# Step 2: Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# Step 3: Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_scaled)

# Get cluster labels
cluster_labels = kmeans.labels_
df['Cluster'] = cluster_labels

# Cluster centers
cluster_centers = kmeans.cluster_centers_
print("Cluster Centers:\n", cluster_centers)

# Step 4: Evaluate the Clustering Performance
true_labels = iris.target

# Adjusted Rand Index (ARI)
ari = adjusted_rand_score(true_labels, cluster_labels)
print(f"Adjusted Rand Index (ARI): {ari}")

# Silhouette Score
silhouette_avg = silhouette_score(X_scaled, cluster_labels)
print(f"Silhouette Score: {silhouette_avg}")

COPY