1 and 2 #pca and #rfe

PHOTO EMBED

Sun Nov 03 2024 13:45:01 GMT+0000 (Coordinated Universal Time)

Saved by @varuntej #python

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import seaborn as sns

df_petrol = pd.read_csv('petrol_consumption.csv')
print(df_petrol.head())
df_petrol.info()
#
df_petrol.isnull().sum()
#
df_petrol.fillna(df_petrol.mean(),inplace=True)
#
sns.pairplot(df_petrol,hue="Petrol_Consumption")
plt.show()
#
#PCA
scaler = StandardScaler()
df = scaler.fit_transform(df)

pca = PCA(n_components=2)
df_pca = pca.fit_transform(df)
df = pd.DataFrame(data = df_pca,columns=['PC1','PC2'])
df["Petrol_Consumption"] = df_petrol["Petrol_Consumption"]
sns.scatterplot(x='PC1',y='PC2',data = df,hue="Petrol_Consumption")
plt.show()

#Rfe
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df = pd.read_csv("Fish.csv")
print(df.head())
df.info()
#
X = df.drop('Species',axis=1)
y = df['Species']

y.info()
#
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=42)

dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(y_pred)
print(y_test)

accuracy = accuracy_score(y_test,y_pred)
print(accuracy)
#
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('Iris.csv')
df.info()
#
encoder = LabelEncoder()
df["Species"] = encoder.fit_transform(df["Species"])
#
df.info()
#
X = df.iloc[:,:-1] #df.drop(columns=["speices"])
y = df.iloc[:,-1] #df["species"]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(y_test,y_pred)
#
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

accuracy = accuracy_score(y_pred,y_test)
print(accuracy)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))
#
# prompt: Visualize insights of Above decision tree classification on iris dataset

from sklearn import tree
import matplotlib.pyplot as plt

plt.figure(figsize=(15,10))
tree.plot_tree(dtc,filled=True,feature_names=X.columns,class_names=['0','1','2'])
plt.show()
#from sklearn.neighbors import KNeighborsClassifier

df = pd.read_csv('diabetes.csv')
df.info()
#
X = df.iloc[:,:-1] #df.drop("Outcome",axis=1)
y = df.iloc[:,-1]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
knn = KNeighborsClassifier(n_neighbors=2)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print(y_test,y_pred)
#
accuracy = accuracy_score(y_pred,y_test)
print(accuracy)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))
content_copyCOPY