1. from sklearn.datasets import load_iris import pandas as pd iris=load_iris() df=pd.DataFrame(data=iris.data, columns=iris.feature_names) print("DataFrame Head:\n",df.head()) print("DataInfo:\n",df.info()) output DataFrame Head: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) 0 5.1 3.5 1.4 0.2 1 4.9 3.0 1.4 0.2 2 4.7 3.2 1.3 0.2 3 4.6 3.1 1.5 0.2 4 5.0 3.6 1.4 0.2 <class 'pandas.core.frame.DataFrame'> RangeIndex: 150 entries, 0 to 149 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 sepal length (cm) 150 non-null float64 1 sepal width (cm) 150 non-null float64 2 petal length (cm) 150 non-null float64 3 petal width (cm) 150 non-null float64 dtypes: float64(4) memory usage: 4.8 KB DataInfo: None 2. from sklearn.preprocessing import StandardScaler scaler=StandardScaler() x_scaled=scaler.fit_transform(df) from sklearn.cluster import KMeans Kmeans=KMeans(n_clusters=3, random_state=42) Kmeans.fit((x_scaled)) clusters_labels=Kmeans.labels_ df['cluster']=clusters_labels cluster_centers=Kmeans.cluster_centers_ print("cluster centers:\n",cluster_centers) ouput cluster centers: [[ 1.13597027 0.08842168 0.99615451 1.01752612] [-1.01457897 0.85326268 -1.30498732 -1.25489349] [-0.05021989 -0.88337647 0.34773781 0.2815273 ]] 3. from sklearn.metrics import adjusted_rand_score,silhouette_score true_labels=iris.target ari=adjusted_rand_score(true_labels,clusters_labels) print(f"Adjusted Rand_Index(ARI):{ari}") silhouette_avg=silhouette_score(x_scaled,clusters_labels) print(f"sithouette score : {silhouette_avg}") ouput Adjusted Rand_Index(ARI):0.6201351808870379 sithouette score : 0.45994823920518635 ll.1 #Hierarchial clustering import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.preprocessing import StandardScaler from scipy.cluster.hierarchy import dendrogram,linkage from scipy.cluster.hierarchy import fcluster iris=load_iris() data=pd.DataFrame(iris.data,columns=iris.feature_names) scaler=StandardScaler() scaled_data=scaler.fit_transform(data) z=linkage(scaled_data,method="ward") plt.figure(figsize=(10,7)) plt.title("Dendogram for Iris dataset") dendrogram(z,labels=iris.target) plt.show() clusters=fcluster(z,3,criterion="maxclust") data['cluster']=clusters data['species']=iris.target print(data.groupby(['cluster','species']).size()) silhouette_avg=silhouette_score(scaled_data,clusters) print(f"Silhouette score: {silhouette_avg}") output cluster species 1 0 49 2 0 1 1 27 2 2 3 1 23 2 48 dtype: int64 Silhouette score: 0.446689041028591