1 and 2 #pca and #rfe
Sun Nov 03 2024 13:45:01 GMT+0000 (Coordinated Universal Time)
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA import seaborn as sns df_petrol = pd.read_csv('petrol_consumption.csv') print(df_petrol.head()) df_petrol.info() # df_petrol.isnull().sum() # df_petrol.fillna(df_petrol.mean(),inplace=True) # sns.pairplot(df_petrol,hue="Petrol_Consumption") plt.show() # #PCA scaler = StandardScaler() df = scaler.fit_transform(df) pca = PCA(n_components=2) df_pca = pca.fit_transform(df) df = pd.DataFrame(data = df_pca,columns=['PC1','PC2']) df["Petrol_Consumption"] = df_petrol["Petrol_Consumption"] sns.scatterplot(x='PC1',y='PC2',data = df,hue="Petrol_Consumption") plt.show() #Rfe import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.feature_selection import RFE from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score df = pd.read_csv("Fish.csv") print(df.head()) df.info() # X = df.drop('Species',axis=1) y = df['Species'] y.info() # X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=42) dtc = DecisionTreeClassifier() dtc.fit(X_train,y_train) y_pred = dtc.predict(X_test) print(y_pred) print(y_test) accuracy = accuracy_score(y_test,y_pred) print(accuracy) # from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder df = pd.read_csv('Iris.csv') df.info() # encoder = LabelEncoder() df["Species"] = encoder.fit_transform(df["Species"]) # df.info() # X = df.iloc[:,:-1] #df.drop(columns=["speices"]) y = df.iloc[:,-1] #df["species"] X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) dtc = DecisionTreeClassifier() dtc.fit(X_train,y_train) y_pred = dtc.predict(X_test) print(y_test,y_pred) # from sklearn.metrics import accuracy_score,classification_report,confusion_matrix accuracy = accuracy_score(y_pred,y_test) print(accuracy) print(classification_report(y_pred,y_test)) print(confusion_matrix(y_pred,y_test)) # # prompt: Visualize insights of Above decision tree classification on iris dataset from sklearn import tree import matplotlib.pyplot as plt plt.figure(figsize=(15,10)) tree.plot_tree(dtc,filled=True,feature_names=X.columns,class_names=['0','1','2']) plt.show() #from sklearn.neighbors import KNeighborsClassifier df = pd.read_csv('diabetes.csv') df.info() # X = df.iloc[:,:-1] #df.drop("Outcome",axis=1) y = df.iloc[:,-1] X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) knn = KNeighborsClassifier(n_neighbors=2) knn.fit(X_train,y_train) y_pred = knn.predict(X_test) print(y_test,y_pred) # accuracy = accuracy_score(y_pred,y_test) print(accuracy) print(classification_report(y_pred,y_test)) print(confusion_matrix(y_pred,y_test))
Comments