import pandas as pd # Load the dataset df = pd.read_csv('Iris.csv') # Display the first few rows and basic information print(df.head()) print(df.describe()) print(df.info()) from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder # Encode the target variable label_encoder = LabelEncoder() df['Species'] = label_encoder.fit_transform(df['Species']) # Separate features and target variable X = df.drop(columns=['Species']) y = df['Species'] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score # Initialize the Decision Tree Classifier model = DecisionTreeClassifier(random_state=42) model.fit(X_train, y_train) # Predict on the test set y_pred = model.predict(X_test) # Estimate the accuracy of the model accuracy = accuracy_score(y_test, y_pred) print("Accuracy of Decision Tree Classifier:", accuracy) from sklearn.tree import plot_tree import matplotlib.pyplot as plt # Plot the decision tree plt.figure(figsize=(12, 8)) plot_tree(model, feature_names=X.columns, class_names=label_encoder.classes_, filled=True) plt.title("Decision Tree Structure") plt.show() importances = model.feature_importances_ feature_names = X.columns # Plot feature importances plt.figure(figsize=(8, 6)) plt.barh(feature_names, importances, color='skyblue') plt.xlabel("Feature Importance") plt.ylabel("Feature") plt.title("Feature Importances in Decision Tree Model") plt.show() import numpy as np from matplotlib.colors import ListedColormap # Define a function to plot decision boundaries def plot_decision_boundary(model, X, y): # Set up the grid for plotting decision boundaries x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) # Predict over the grid Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Plot the contour and training examples plt.figure(figsize=(10, 6)) plt.contourf(xx, yy, Z, alpha=0.3, cmap=ListedColormap(('red', 'green', 'blue'))) scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=ListedColormap(('red', 'green', 'blue'))) plt.xlabel("Petal Length") plt.ylabel("Petal Width") plt.title("Decision Boundary (Petal Length vs Petal Width)") plt.legend(handles=scatter.legend_elements()[0], labels=label_encoder.classes_) plt.show() # Extract PetalLength and PetalWidth for visualization X_2d_train = X_train[['PetalLengthCm', 'PetalWidthCm']].values y_train_2d = y_train.values # Train a new model on these two features for simplicity model_2d = DecisionTreeClassifier(random_state=42) model_2d.fit(X_2d_train, y_train_2d) # Plot the decision boundary plot_decision_boundary(model_2d, X_2d_train, y_train_2d)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter