# Load packages
library(class)
library(ggplot2)
library(caret)
# Normalize and prepare data
data(iris)
norm <- function(x) (x - min(x)) / (max(x) - min(x))
iris_norm <- as.data.frame(lapply(iris[1:4], norm))
iris_norm$Species <- iris$Species
# Train-test split
set.seed(123)
idx <- createDataPartition(iris_norm$Species, p = 0.8, list = FALSE)
train_X <- iris_norm[idx, 1:4]; test_X <- iris_norm[-idx, 1:4]
train_Y <- iris_norm[idx, 5]; test_Y <- iris_norm[-idx, 5]
# Evaluate KNN for various k
eval_knn <- function(k) mean(knn(train_X, test_X, train_Y, k) == test_Y) * 100
k_vals <- seq(1, 20, 2)
acc <- sapply(k_vals, eval_knn)
results <- data.frame(K = k_vals, Accuracy = acc)
print(results)
# Plot accuracy vs. K
ggplot(results, aes(K, Accuracy)) +
geom_line(color = "blue") + geom_point(color = "red") +
labs(title = "KNN Accuracy vs. K", x = "K", y = "Accuracy (%)") +
theme_minimal()
# Final model with optimal K
final_pred <- knn(train_X, test_X, train_Y, k = 5)
print(confusionMatrix(final_pred, test_Y))
Comments