import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import seaborn as sns
df_petrol = pd.read_csv('petrol_consumption.csv')
print(df_petrol.head())
df_petrol.info()
#
df_petrol.isnull().sum()
#
df_petrol.fillna(df_petrol.mean(),inplace=True)
#
sns.pairplot(df_petrol,hue="Petrol_Consumption")
plt.show()
#
#PCA
scaler = StandardScaler()
df = scaler.fit_transform(df)
pca = PCA(n_components=2)
df_pca = pca.fit_transform(df)
df = pd.DataFrame(data = df_pca,columns=['PC1','PC2'])
df["Petrol_Consumption"] = df_petrol["Petrol_Consumption"]
sns.scatterplot(x='PC1',y='PC2',data = df,hue="Petrol_Consumption")
plt.show()
#Rfe
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
df = pd.read_csv("Fish.csv")
print(df.head())
df.info()
#
X = df.drop('Species',axis=1)
y = df['Species']
y.info()
#
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=42)
dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(y_pred)
print(y_test)
accuracy = accuracy_score(y_test,y_pred)
print(accuracy)
#
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv('Iris.csv')
df.info()
#
encoder = LabelEncoder()
df["Species"] = encoder.fit_transform(df["Species"])
#
df.info()
#
X = df.iloc[:,:-1] #df.drop(columns=["speices"])
y = df.iloc[:,-1] #df["species"]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
print(y_test,y_pred)
#
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
accuracy = accuracy_score(y_pred,y_test)
print(accuracy)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))
#
# prompt: Visualize insights of Above decision tree classification on iris dataset
from sklearn import tree
import matplotlib.pyplot as plt
plt.figure(figsize=(15,10))
tree.plot_tree(dtc,filled=True,feature_names=X.columns,class_names=['0','1','2'])
plt.show()
#from sklearn.neighbors import KNeighborsClassifier
df = pd.read_csv('diabetes.csv')
df.info()
#
X = df.iloc[:,:-1] #df.drop("Outcome",axis=1)
y = df.iloc[:,-1]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
knn = KNeighborsClassifier(n_neighbors=2)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
print(y_test,y_pred)
#
accuracy = accuracy_score(y_pred,y_test)
print(accuracy)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter