#Decision Tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd
# Load Soybean Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data"
columns = ['date', 'plant-stand', 'precip', 'temp', 'hail', 'crop-hist', 'area-damaged', 'severity', 'seed-tmt',
'germination', 'plant-growth', 'leaves', 'leafspots-halo', 'leafspots-marg', 'leafspot-size',
'leaf-shread', 'leaf-malf', 'leaf-mild', 'stem', 'lodging', 'stem-cankers', 'canker-lesion',
'fruiting-bodies', 'external-decay', 'mycelium', 'int-discolor', 'sclerotia', 'fruit-pods',
'roots', 'class'
]
data = pd.read_csv(url, header = None, names = columns)
X = data.drop(columns = ['class'])
y = data['class']
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
decision_tree = DecisionTreeClassifier(criterion="gini", max_depth=5, random_state=42)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf = confusion_matrix(y_test, y_pred)
classR = classification_report(y_test, y_pred)
print(accuracy)
print(conf)
print(classR)
Comments