Misc R

PHOTO EMBED

Wed Aug 11 2021 07:07:24 GMT+0000 (Coordinated Universal Time)

Saved by @ahwazakhtar

pacman::p_load(tidyverse, tidyr, readxl, dplyr, plyr, openxlsx, ggplot2, arules, splitstackshape)

#Get all files in folder
filenames_list <- list.files(, pattern = "\\.xls$")
dee.list <- ldply(filenames_list, read_xls)

#Output file
write.xlsx(toexport,sample_file, overwrite = TRUE)

#Data Manipulation
dee.list$month <- format(dee.list$TRIAGE_DATETIME, format = "%m")
dee.list$year <- format(dee.list$TRIAGE_DATETIME, format = "%Y")
merged_infant_fever <- left_join(fever_infant_2020, dee.list, by = 'ER_NO') 
write.xlsx(merged_infant_fever, "/Users/wasfafarooq/Desktop/RdataED/Updated Raw Data/Fever_withinvs.xlsx")
singlefile <- read_xlsx("/Users/wasfafarooq/Desktop/Peds Fever V2.xlsx")

#Summary Statistics and dplyr
dee.list %>% group_by(year, month) %>% 
  dplyr::summarize(count=n(), mean_age = mean(AGE_YEARS, na.rm = TRUE)) %>%  view()

summarise(mydata, mpg_mean=mean(mpg),mpg_median=median(mpg))
summarise_at(mydata, vars(mpg, hp), funs(n(), mean, median))
summarise_if(mydata, is.numeric, funs(n(),mean,median))
summarise_all(mydata,funs(n(),mean,median))

data2 %>% select(AGE_YEARS, agecat) %>% filter(between(AGE_YEARS,37,40) )

mydata2 %>% select(Species,Sepal.Length) %>%
    group_by(Species) %>%
    filter(min_rank(desc(Sepal.Length))<=5)

#difference between two dfs
setdiff(df1,df2)
union(df1,df2)
intersect(df1, df2)

#Row-wise max
df1 = mydata2 %>%
  rowwise() %>% mutate(row_max= max(Sepal.Length:Petal.Width))

#the one below is for tabs and cross tabs
attach(dee.list)
mytable <- table(month, category)
mytable

#Sorting
dee.list <- dee.list[order(dee.list$AGE_YEARS),]
data2 <- data[order(-data$AGE_YEARS),]

#Simple replace                    
replace(dee.list$AGE_YEARS, dee.list$AGE_YEARS<0, 0)

#Cut into 10 categories by age
dee.list$agecat <- cut(dee.list$AGE_YEARS, 10)

#Sample 10 random rows per group
out2 <- dee.list %>%
  group_by(AGE_YEARS, GENDER) %>% sample_n(10)

#This is cute - Discretize by group
cats <- seq(0,100,by=5)
data2$agecat <- discretize(data2$AGE_YEARS, method = "frequency", breaks = 10)
data2$agecat2 <- discretize(data2$AGE_YEARS, method = "fixed", breaks = cats)

#Stratify by group
check <- stratified(non_empty, c("agecat2", "GENDER", "month", "year"), size = 0.10)
check <- check %>% add_count(ED_DX2)
check <- check %>% distinct(ED_DX2, .keep_all = TRUE)

#Subinstr
non_empty$ED_DX2 <- gsub("[^A-Za-z0-9 ]","",non_empty$ED_DX)
non_empty$ED_DX2 <- tolower(non_empty$ED_DX2)

non_empty$HOPI2 <- gsub("[^A-Za-z0-9 ]","",non_empty$HOPI_)
non_empty$HOPI2 <- tolower(non_empty$HOPI2)

#### Move a column to first position
new_df = student_df %>% select(Mathematics_score, everything()) 
#### to change position use select as well
df2 = df1[,c(2,4,3,1)]
student_df %>% arrange(Name)
student_df %>% arrange(desc(Name))
arrange_all(df)

#Sample with replacement
set.seed(123)
index = sample(1:nrow(mtcars), 10,replace = TRUE)
index
mtcars[index,]


                  
content_copyCOPY