5.Implement the Decision tree Classification model on Iris.est dataset. Estimate the accuracy of the model. Also write the program to visualize insights of the dataset.

PHOTO EMBED

Sun Nov 03 2024 12:58:52 GMT+0000 (Coordinated Universal Time)

Saved by @varuntej #python

import pandas as pd 
# Load the dataset 
df = pd.read_csv('Iris.csv') 
# Display the first few rows and basic information 
print(df.head()) 
print(df.describe()) 
print(df.info()) 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder 
# Encode the target variable 
label_encoder = LabelEncoder() 
df['Species'] = label_encoder.fit_transform(df['Species']) 
# Separate features and target variable 
X = df.drop(columns=['Species']) 
y = df['Species'] 
# Split the data into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score 
# Initialize the Decision Tree Classifier 
model = DecisionTreeClassifier(random_state=42) 
model.fit(X_train, y_train) 
# Predict on the test set 
y_pred = model.predict(X_test) 
# Estimate the accuracy of the model 
accuracy = accuracy_score(y_test, y_pred) 
print("Accuracy of Decision Tree Classifier:", accuracy) 
from sklearn.tree import plot_tree 
import matplotlib.pyplot as plt 
# Plot the decision tree 
plt.figure(figsize=(12, 8)) 
plot_tree(model, feature_names=X.columns, class_names=label_encoder.classes_, filled=True) 
plt.title("Decision Tree Structure") 
plt.show() 
importances = model.feature_importances_ 
feature_names = X.columns 
# Plot feature importances 
plt.figure(figsize=(8, 6)) 
plt.barh(feature_names, importances, color='skyblue') 
plt.xlabel("Feature Importance") 
plt.ylabel("Feature") 
plt.title("Feature Importances in Decision Tree Model") 
plt.show() 
import numpy as np 
from matplotlib.colors import ListedColormap 
# Define a function to plot decision boundaries 
def plot_decision_boundary(model, X, y): 
# Set up the grid for plotting decision boundaries 
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), 
np.arange(y_min, y_max, 0.02)) 
# Predict over the grid 
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) 
Z = Z.reshape(xx.shape) 
# Plot the contour and training examples 
plt.figure(figsize=(10, 6)) 
plt.contourf(xx, yy, Z, alpha=0.3, cmap=ListedColormap(('red', 'green', 'blue'))) 
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=ListedColormap(('red', 'green', 'blue'))) 
plt.xlabel("Petal Length") 
plt.ylabel("Petal Width") 
plt.title("Decision Boundary (Petal Length vs Petal Width)") 
plt.legend(handles=scatter.legend_elements()[0], labels=label_encoder.classes_) 
plt.show() 
# Extract PetalLength and PetalWidth for visualization 
X_2d_train = X_train[['PetalLengthCm', 'PetalWidthCm']].values 
y_train_2d = y_train.values 
# Train a new model on these two features for simplicity 
model_2d = DecisionTreeClassifier(random_state=42) 
model_2d.fit(X_2d_train, y_train_2d) 
# Plot the decision boundary 
plot_decision_boundary(model_2d, X_2d_train, y_train_2d)
content_copyCOPY