# Install and Load Necessary Libraries
install.packages(c("titanic", "dplyr"))
library(titanic)
library(dplyr)
# Load Titanic Dataset
data <- titanic::titanic_train
# Handle Missing Values
data$Age[is.na(data$Age)] <- median(data$Age, na.rm = TRUE)
data <- filter(data, !is.na(Embarked))
# Convert Categorical Variables to Factors
data <- data %>%
mutate(
Sex = as.factor(Sex),
Embarked = as.factor(Embarked),
Pclass = as.factor(Pclass),
FamilySize = SibSp + Parch + 1,
IsAlone = as.integer(FamilySize == 1),
Fare = scale(Fare)
)
# Final Dataset Check
str(data)
summary(data)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter