import pandas as pd
# Load the dataset
df = pd.read_csv('Iris.csv')
# Display the first few rows and basic information
print(df.head())
print(df.describe())
print(df.info())
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# Encode the target variable
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])
# Separate features and target variable
X = df.drop(columns=['Species'])
y = df['Species']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
# Initialize the Decision Tree Classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
# Predict on the test set
y_pred = model.predict(X_test)
# Estimate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of Decision Tree Classifier:", accuracy)
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
# Plot the decision tree
plt.figure(figsize=(12, 8))
plot_tree(model, feature_names=X.columns, class_names=label_encoder.classes_, filled=True)
plt.title("Decision Tree Structure")
plt.show()
importances = model.feature_importances_
feature_names = X.columns
# Plot feature importances
plt.figure(figsize=(8, 6))
plt.barh(feature_names, importances, color='skyblue')
plt.xlabel("Feature Importance")
plt.ylabel("Feature")
plt.title("Feature Importances in Decision Tree Model")
plt.show()
import numpy as np
from matplotlib.colors import ListedColormap
# Define a function to plot decision boundaries
def plot_decision_boundary(model, X, y):
# Set up the grid for plotting decision boundaries
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
# Predict over the grid
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=ListedColormap(('red', 'green', 'blue')))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=ListedColormap(('red', 'green', 'blue')))
plt.xlabel("Petal Length")
plt.ylabel("Petal Width")
plt.title("Decision Boundary (Petal Length vs Petal Width)")
plt.legend(handles=scatter.legend_elements()[0], labels=label_encoder.classes_)
plt.show()
# Extract PetalLength and PetalWidth for visualization
X_2d_train = X_train[['PetalLengthCm', 'PetalWidthCm']].values
y_train_2d = y_train.values
# Train a new model on these two features for simplicity
model_2d = DecisionTreeClassifier(random_state=42)
model_2d.fit(X_2d_train, y_train_2d)
# Plot the decision boundary
plot_decision_boundary(model_2d, X_2d_train, y_train_2d)
Comments