dplyr.r

PHOTO EMBED

Mon Jun 24 2024 21:08:52 GMT+0000 (Coordinated Universal Time)

Saved by @jkirangw

dplyr #data manipulation
Tidyr #data cleaning
%>% #pipe operator in r

install.packages('dplyr')
#install.packages('nycflights13')
library(dplyr)
#dplyr functions

1. filter(df, <conditional expressions>) #select a set of rows from a data frame
#filter rows of data.frame with: (filter(), slice())

head(filter(df, month==11, day==3, carrier=='AA'))
head(flights[flights$month == 11 $ flights$day == 3, ])
head(slice(df, 1:10))

#ordering rows of a data frame

arrange(df, year,month,day, desc(arr_time))

#select the columns of the data.frame

select(df, month, day)

#rename columns

syntax: rename(df, new_col_name = old_col_name)
rename(df, airline_carrier == carrier)

#select unique values of a column

distinct(select(df, airlines))

#Add new columns to data frame with mutate() function

mutate(df, new_column = arrival_column - depart_column)

#Transmute() returns the new column back

transmute(df, new_column = arrival_column - depart_column)

#collapsing the data in a column into a single value eg mean or sum or sd

summarise(flights, new_average = mean(airtime_column, na.rm = TRUE))

#random sampling of rows

sample_n(flights, 10) #random samples 10 rows

sample_frac(flights, 0.1) #10% of the rows
content_copyCOPY

https://rdrr.io/snippets/