Misc R
Wed Aug 11 2021 07:07:24 GMT+0000 (Coordinated Universal Time)
Saved by @ahwazakhtar
pacman::p_load(tidyverse, tidyr, readxl, dplyr, plyr, openxlsx, ggplot2, arules, splitstackshape) #Get all files in folder filenames_list <- list.files(, pattern = "\\.xls$") dee.list <- ldply(filenames_list, read_xls) #Output file write.xlsx(toexport,sample_file, overwrite = TRUE) #Data Manipulation dee.list$month <- format(dee.list$TRIAGE_DATETIME, format = "%m") dee.list$year <- format(dee.list$TRIAGE_DATETIME, format = "%Y") merged_infant_fever <- left_join(fever_infant_2020, dee.list, by = 'ER_NO') write.xlsx(merged_infant_fever, "/Users/wasfafarooq/Desktop/RdataED/Updated Raw Data/Fever_withinvs.xlsx") singlefile <- read_xlsx("/Users/wasfafarooq/Desktop/Peds Fever V2.xlsx") #Summary Statistics and dplyr dee.list %>% group_by(year, month) %>% dplyr::summarize(count=n(), mean_age = mean(AGE_YEARS, na.rm = TRUE)) %>% view() summarise(mydata, mpg_mean=mean(mpg),mpg_median=median(mpg)) summarise_at(mydata, vars(mpg, hp), funs(n(), mean, median)) summarise_if(mydata, is.numeric, funs(n(),mean,median)) summarise_all(mydata,funs(n(),mean,median)) data2 %>% select(AGE_YEARS, agecat) %>% filter(between(AGE_YEARS,37,40) ) mydata2 %>% select(Species,Sepal.Length) %>% group_by(Species) %>% filter(min_rank(desc(Sepal.Length))<=5) #difference between two dfs setdiff(df1,df2) union(df1,df2) intersect(df1, df2) #Row-wise max df1 = mydata2 %>% rowwise() %>% mutate(row_max= max(Sepal.Length:Petal.Width)) #the one below is for tabs and cross tabs attach(dee.list) mytable <- table(month, category) mytable #Sorting dee.list <- dee.list[order(dee.list$AGE_YEARS),] data2 <- data[order(-data$AGE_YEARS),] #Simple replace replace(dee.list$AGE_YEARS, dee.list$AGE_YEARS<0, 0) #Cut into 10 categories by age dee.list$agecat <- cut(dee.list$AGE_YEARS, 10) #Sample 10 random rows per group out2 <- dee.list %>% group_by(AGE_YEARS, GENDER) %>% sample_n(10) #This is cute - Discretize by group cats <- seq(0,100,by=5) data2$agecat <- discretize(data2$AGE_YEARS, method = "frequency", breaks = 10) data2$agecat2 <- discretize(data2$AGE_YEARS, method = "fixed", breaks = cats) #Stratify by group check <- stratified(non_empty, c("agecat2", "GENDER", "month", "year"), size = 0.10) check <- check %>% add_count(ED_DX2) check <- check %>% distinct(ED_DX2, .keep_all = TRUE) #Subinstr non_empty$ED_DX2 <- gsub("[^A-Za-z0-9 ]","",non_empty$ED_DX) non_empty$ED_DX2 <- tolower(non_empty$ED_DX2) non_empty$HOPI2 <- gsub("[^A-Za-z0-9 ]","",non_empty$HOPI_) non_empty$HOPI2 <- tolower(non_empty$HOPI2) #### Move a column to first position new_df = student_df %>% select(Mathematics_score, everything()) #### to change position use select as well df2 = df1[,c(2,4,3,1)] student_df %>% arrange(Name) student_df %>% arrange(desc(Name)) arrange_all(df) #Sample with replacement set.seed(123) index = sample(1:nrow(mtcars), 10,replace = TRUE) index mtcars[index,]
Comments