2.6 R

PHOTO EMBED

Fri Aug 27 2021 15:46:23 GMT+0000 (Coordinated Universal Time)

Saved by @natalyabee #r



#==========================================================================
# R script for the Submodule 2.6 Population Density Maps - FB MOOC 
# Data-Pop Alliance
# Guillermo Romero, Researcher and data scientist
# Comments and questions: gromero@datapopalliance.org
# August, 2021
#==========================================================================

# To install libraries
install.packages("data.table")
install.packages("rgdal")
install.packages("ggplot2")
install.packages("lubridate")
install.packages("plyr")
install.packages("viridis")
install.packages("ggthemes")
install.packages("mapproj")
install.packages("spdplyr")
install.packages("geojsonio")

# Load libraries
library(data.table)
library(rgdal)
library(ggplot2)
library(lubridate)
library(plyr)
library(viridis)
library(ggthemes)
library(mapproj)
library(geojsonio)
library(spdplyr)


################################
# First Part
# Assign Total population into
# the area of interest
################################


# Set the working directory
setwd("~/Documents/DPA_tutorial")

# Open the total population file 
totalPop = fread('unzip -p population_mex_2018-10-01.csv.zip')

# Since this population density map file is big
# We are going to use only a subset of this file (for the area of interest) 
# zm stands for an area capturing Mexico city and the state of Mexico

zm <- subset(totalPop, longitude >= -101 & longitude <= -98)
zm <- subset(zm, latitude >= 18 & latitude <= 21)


# Open the spatial file (shapefile)
# The area of interest is Distrito Federal (Mexico city)
# And the neighborhood state called Mexico
# Note: in the DPA_tutorial folder, create a subfolder named shapefiles
# where you need to save your spatial files

geo <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
geo <- geo[,c(4,6,7)]
g1<-subset(geo, NAME_1=="Distrito Federal")
g2<-subset(geo,NAME_1=="México")
geo<-rbind(g1,g2)

# Fortify allows you to work a dataframe from the spatial object

geodf<-fortify(geo)
geo$id <- row.names(geo) # allocate an id variable to the spatial object
coords<-zm[,c(2,1)] # define the coordinates columns
sp <- SpatialPoints(coords)
rm(coords)

# Define the Coordinate Reference System (CRS)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(geo) <-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

# Assigning points into polygons
assign <- over(sp, geo)

# Use rownames to easily merge in the following step
assign$rous <- rownames(assign)
zm$rous<-rownames(zm)

assign$rous<-as.factor(as.character(assign$rous))
zm$rous<-as.factor(as.character(zm$rous))

names(assign)[1]<-"ID"

# Merge the assign object with the zm (Mexico city and Mexico state) area
zm.Map <- merge(x=assign, y=zm, by.x="rous", by.y="rous")
dim(zm.Map)
zm.Map <- zm.Map[!is.na(zm.Map$GID_2),]
head(zm.Map)

# Sum the population per municipality
# This is the total population density map per municipality
total.zm.Mun<-aggregate(population_2020~NAME_2+GID_2, FUN=sum, data=zm.Map, na.rm=TRUE)
head(total.zm.Mun)





################################
# Second Part
# Assign Women population into
# the area of interest
################################



# Women - High resolution population density map
# Open the women population file 
totalPop = fread('unzip -p mex_women_of_reproductive_age_15_49_2019-06-01_csv.zip')

# Since this population density map file is big
# We are going to use only a subset of this file (for the area of interest) 
# zm stands for an area capturing Mexico city and the state of Mexico

zm <- subset(totalPop, longitude >= -101 & longitude <= -98)
zm <- subset(zm, latitude >= 18 & latitude <= 21)

# Open the spatial file (shapefile)
# The area of interest is Distrito Federal (Mexico city)
# And the neighborhood state called Mexico

geo <- readOGR(dsn="shapes", layer="gadm36_MEX_2")
geo <- geo[,c(4,6,7)]
g1<-subset(geo, NAME_1=="Distrito Federal")
g2<-subset(geo,NAME_1=="México")
geo<-rbind(g1,g2)

geodf<-fortify(geo)
geo$id <- row.names(geo) 
coords<-zm[,c(2,1)]
sp <- SpatialPoints(coords)
rm(coords)

# Use the following Coordinate Reference System (CRS)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(geo) <-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

#assigning points into polygons
assign <- over(sp, geo)
dim(assign)

assign$rous <- rownames(assign)
zm$rous<-rownames(zm)

assign$rous<-as.factor(as.character(assign$rous))
zm$rous<-as.factor(as.character(zm$rous))
names(assign)[1]<-"ID"

# merge
zm.Map <- merge(x=assign, y=zm, by.x="rous", by.y="rous")
dim(zm.Map)
zm.Map <- zm.Map[!is.na(zm.Map$GID_2),]
head(zm.Map)

# Sum the women population per municipality
# This is the women population density map per municipality
women.zm.Mun<-aggregate(population~NAME_2+GID_2, FUN=sum, data=zm.Map, na.rm=TRUE)
names(women.zm.Mun)[3]<-"women_population"
head(women.zm.Mun)


# Merge total- and women population per mun
pop.Map <- merge(x=total.zm.Mun, y=women.zm.Mun, by.x="GID_2", by.y="GID_2")

# pop.Map is the object containing both, the total and the women population info
pop.Map$women.perc<-pop.Map$women_population/pop.Map$population_2020
pop.Map<-pop.Map[,c(1,2,3,5,6)]
names(pop.Map)[2]<-"municipality"
head(pop.Map)

rm(totalPop)
rm(zm)


################################
# Third Part
# Incorporate mobility data and
# Calculate a risk score
################################


# Load Fb Range Maps (Mobility data) and select the country of interest
mobility = fread('unzip -p movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
rm(mobility)

# Merge mobility with the population (pop.Map) object previously created
mobility.pop.Map <- merge(x=pop.Map, y=mex, by.x="GID_2", by.y="polygon_id")
names(mobility.pop.Map)[6]<-"date"
head(mobility.pop.Map)

# Select dates of interest
# In this case, we are going to use a week during the quarantine
# Rename column accordingly

mobility.pop.Map$date<-as.Date(mobility.pop.Map$date, format="%Y-%m-%d")
mobility.pop.quarantine<-subset(mobility.pop.Map, date>="2020-05-01" & date<="2020-05-07")
names(mobility.pop.quarantine)[11]<-"population.staying.home" 
head(mobility.pop.quarantine)

# Get the media of population staying home per municipality
mobility.pop.quarantine<-aggregate(population.staying.home~GID_2+women_population+women.perc+population_2020, FUN=mean, data=mobility.pop.quarantine, na.rm=TRUE)

# Calculate a weighted risk score
attach(mobility.pop.quarantine)
mobility.pop.quarantine$Risk.Score<-(women.perc)*(1-population.staying.home)*(sum(pop.Map$women_population))

# Normalize risk score
attach(mobility.pop.quarantine)
mobility.pop.quarantine$nRisk.Score<-(Risk.Score-min(Risk.Score))/(max(Risk.Score)-min(Risk.Score))
mobility.pop.quarantine<-mobility.pop.quarantine[,c(1,7)]
head(mobility.pop.quarantine)



################################
# Map using ggplot2
################################




# Choropleth Map with risk score

shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)

shapefile@data$id = rownames(shapefile@data)
head(shapefile@data)
class(shapefile@data)
dim(shapefile@data)

#shapefile.points = fortify(shapefile, region = "id")
shapefile.points = fortify(shapefile)
shapefile.df = join(shapefile.points, shapefile@data, by = "id")
head(shapefile.df)
shapefile.df = subset(shapefile.df, select = c(long, lat, group, GID_2))
sort(unique(shapefile.df$GID_2))

#names(shapefile.df) = c("long", "lat", "group", "NAME1")
risk.map = join(shapefile.df, mobility.pop.quarantine, by = "GID_2", type = "full")
head(risk.map)
dim(risk.map)


#chrolopleth
#png("zm.risk.score.map.png", width = 5, height = 5, units = 'in', res = 300)
p0 <- ggplot(data = risk.map,
             mapping = aes(x = long, y = lat,
                 group = group,
                 fill = nRisk.Score))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.05) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45)

p2 <- p1 + scale_fill_viridis_c(option = "magma", direction = -1)
p2 + theme_map() + #facet_wrap(~ year, ncol = 2) +
    theme(legend.position = "right",
          strip.background = element_blank()) +
    #labs(fill = "Changement en pourcentage / semaine",
    #     "title = "Opiate Related Deaths by State, 2000-2014")
     labs(fill = "nRisk.Score")
#dev.off()



################################
# Creation of files for kepler:
# risk score
################################

riskScore<-subset(risk.map, !duplicated(GID_2)) 
head(riskScore)

shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]

shapefile@data = join(shapefile@data, riskScore, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)

#to convert to geojson format
json <- geojson_json(shapefile)# Simplify the geometry information of GeoJSON.
geojson_write(json, geometry = "polygon", file = "nrisk.Score.geojson")






################################
# Creation of files for kepler:
# # Pre-COVID temporal window. Mobility data
################################



mobility = fread('unzip -p /home/guillermo/Documents/dpa/fbMooc/population.maps/movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
mex<-mex[,c(4,1,7)]

names(mex)[1]<-"GID_2"
names(mex)[2]<-"date"
names(mex)[3]<-"population.staying.home"

#select a temporal window of one week
#data starts at 2020-03-01
mex$date<-as.Date(mex$date, format="%Y-%m-%d")
mex<-subset(mex, date<="2020-03-07")
mex<-aggregate(population.staying.home~GID_2, FUN=mean, data=mex, na.rm=TRUE)


#need coordinates
#get them the previous object named: risk.map

coord<-subset(risk.map, !duplicated(GID_2)) 
coord<-coord[,c(1:4)]

#add coordinates to mex object
mex <- merge(x=mex, y=coord, by="GID_2", all.y=TRUE)


shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]

shapefile@data = join(shapefile@data, mex, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)

json <- geojson_json(shapefile)# Simplify the geometry information of GeoJSON.
geojson_write(json, geometry = "polygon", file = "mobility.precovid.geojson")




################################
# Creation of files for kepler:
# Quarantine week. Mobility data
################################


mobility = fread('unzip -p /home/guillermo/Documents/dpa/fbMooc/population.maps/movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
mex<-mex[,c(4,1,7)]

names(mex)[1]<-"GID_2"
names(mex)[2]<-"date"
names(mex)[3]<-"atHome"

mex$date<-as.Date(mex$date, format="%Y-%m-%d")
mex<-subset(mex, date>="2020-05-01")
mex<-subset(mex, date<="2020-05-07")

mex<-aggregate(atHome~GID_2, FUN=mean, data=mex, na.rm=TRUE)


#need coordinates
#get them from previous object named: risk.map
coord<-subset(risk.map, !duplicated(GID_2)) 
coord<-coord[,c(1:4)]

#add coordinates to mex object
mex <- merge(x=mex, y=coord, by="GID_2", all.y=TRUE)

shapefile <- readOGR(dsn="shapes", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]


shapefile@data = join(shapefile@data, mex, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)

#convert into geojson format
json <- geojson_json(shapefile)
geojson_write(json, geometry = "polygon", file = "mobility.quarantine.geojson")






################################
# Creation of files for kepler:
# population density map sample
################################

totalPop = fread('unzip -p population_mex_2018-10-01.csv.zip')

#get a sample of your total pop
sampling <- totalPop[sample(nrow(totalPop), 174579), ]

#write
write.csv(sampling, "population.density.map.sample.csv", row.names=FALSE)
content_copyCOPY