thiscodeWorks | thiscodeWorks

1.
from sklearn.datasets import load_iris
import pandas as pd
iris=load_iris()
df=pd.DataFrame(data=iris.data, columns=iris.feature_names)
print("DataFrame Head:\n",df.head())
print("DataInfo:\n",df.info())

output
DataFrame Head:
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
DataInfo:
 None
2.
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x_scaled=scaler.fit_transform(df)
from sklearn.cluster import KMeans
Kmeans=KMeans(n_clusters=3, random_state=42)
Kmeans.fit((x_scaled))
clusters_labels=Kmeans.labels_
df['cluster']=clusters_labels
cluster_centers=Kmeans.cluster_centers_
print("cluster centers:\n",cluster_centers)

ouput

cluster centers:
 [[ 1.13597027  0.08842168  0.99615451  1.01752612]
 [-1.01457897  0.85326268 -1.30498732 -1.25489349]
 [-0.05021989 -0.88337647  0.34773781  0.2815273 ]]

3.

from sklearn.metrics import adjusted_rand_score,silhouette_score
true_labels=iris.target
ari=adjusted_rand_score(true_labels,clusters_labels)
print(f"Adjusted Rand_Index(ARI):{ari}")
silhouette_avg=silhouette_score(x_scaled,clusters_labels)
print(f"sithouette score : {silhouette_avg}")

ouput

Adjusted Rand_Index(ARI):0.6201351808870379
sithouette score : 0.45994823920518635

ll.1

#Hierarchial clustering
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram,linkage
from scipy.cluster.hierarchy import fcluster
iris=load_iris()
data=pd.DataFrame(iris.data,columns=iris.feature_names)
scaler=StandardScaler()
scaled_data=scaler.fit_transform(data)
z=linkage(scaled_data,method="ward")
plt.figure(figsize=(10,7))
plt.title("Dendogram for Iris dataset")
dendrogram(z,labels=iris.target)
plt.show()
clusters=fcluster(z,3,criterion="maxclust")
data['cluster']=clusters
data['species']=iris.target
print(data.groupby(['cluster','species']).size())
silhouette_avg=silhouette_score(scaled_data,clusters)
print(f"Silhouette score: {silhouette_avg}")

output
 

cluster  species
1        0          49
2        0           1
         1          27
         2           2
3        1          23
         2          48
dtype: int64
Silhouette score: 0.446689041028591