#Descriptive Statistics Analysis in R
#We'll use the Titanic dataset (from the titanic package) and compute key descriptive statistics
#such as mean, median, standard deviation, minimum, maximum, and quartiles for relevant
#numerical variables.
# Install and Load Packages
install.packages(c("titanic", "dplyr"))
library(titanic)
library(dplyr)
# Load Titanic Dataset
data <- titanic::titanic_train
head(data)
# Summary Statistics for Numeric Variables
summary(select(data, where(is.numeric)))
# Descriptive Statistics for Age & Fare
stats <- summarise(data,
Mean_Age = mean(Age, na.rm = TRUE),
Median_Age = median(Age, na.rm = TRUE),
SD_Age = sd(Age, na.rm = TRUE),
Var_Age = var(Age, na.rm = TRUE),
Min_Age = min(Age, na.rm = TRUE),
Max_Age = max(Age, na.rm = TRUE),
IQR_Age = IQR(Age, na.rm = TRUE),
Mean_Fare = mean(Fare, na.rm = TRUE),
Median_Fare = median(Fare, na.rm = TRUE),
SD_Fare = sd(Fare, na.rm = TRUE)
)
print(stats)
Comments