#==========================================================================
# R script for the Submodule 2.6 Population Density Maps - FB MOOC
# Data-Pop Alliance
# Guillermo Romero, Researcher and data scientist
# Comments and questions: gromero@datapopalliance.org
# August, 2021
#==========================================================================
# To install libraries
install.packages("data.table")
install.packages("rgdal")
install.packages("ggplot2")
install.packages("lubridate")
install.packages("plyr")
install.packages("viridis")
install.packages("ggthemes")
install.packages("mapproj")
install.packages("spdplyr")
install.packages("geojsonio")
# Load libraries
library(data.table)
library(rgdal)
library(ggplot2)
library(lubridate)
library(plyr)
library(viridis)
library(ggthemes)
library(mapproj)
library(geojsonio)
library(spdplyr)
################################
# First Part
# Assign Total population into
# the area of interest
################################
# Set the working directory
setwd("~/Documents/DPA_tutorial")
# Open the total population file
totalPop = fread('unzip -p population_mex_2018-10-01.csv.zip')
# Since this population density map file is big
# We are going to use only a subset of this file (for the area of interest)
# zm stands for an area capturing Mexico city and the state of Mexico
zm <- subset(totalPop, longitude >= -101 & longitude <= -98)
zm <- subset(zm, latitude >= 18 & latitude <= 21)
# Open the spatial file (shapefile)
# The area of interest is Distrito Federal (Mexico city)
# And the neighborhood state called Mexico
# Note: in the DPA_tutorial folder, create a subfolder named shapefiles
# where you need to save your spatial files
geo <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
geo <- geo[,c(4,6,7)]
g1<-subset(geo, NAME_1=="Distrito Federal")
g2<-subset(geo,NAME_1=="México")
geo<-rbind(g1,g2)
# Fortify allows you to work a dataframe from the spatial object
geodf<-fortify(geo)
geo$id <- row.names(geo) # allocate an id variable to the spatial object
coords<-zm[,c(2,1)] # define the coordinates columns
sp <- SpatialPoints(coords)
rm(coords)
# Define the Coordinate Reference System (CRS)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(geo) <-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
# Assigning points into polygons
assign <- over(sp, geo)
# Use rownames to easily merge in the following step
assign$rous <- rownames(assign)
zm$rous<-rownames(zm)
assign$rous<-as.factor(as.character(assign$rous))
zm$rous<-as.factor(as.character(zm$rous))
names(assign)[1]<-"ID"
# Merge the assign object with the zm (Mexico city and Mexico state) area
zm.Map <- merge(x=assign, y=zm, by.x="rous", by.y="rous")
dim(zm.Map)
zm.Map <- zm.Map[!is.na(zm.Map$GID_2),]
head(zm.Map)
# Sum the population per municipality
# This is the total population density map per municipality
total.zm.Mun<-aggregate(population_2020~NAME_2+GID_2, FUN=sum, data=zm.Map, na.rm=TRUE)
head(total.zm.Mun)
################################
# Second Part
# Assign Women population into
# the area of interest
################################
# Women - High resolution population density map
# Open the women population file
totalPop = fread('unzip -p mex_women_of_reproductive_age_15_49_2019-06-01_csv.zip')
# Since this population density map file is big
# We are going to use only a subset of this file (for the area of interest)
# zm stands for an area capturing Mexico city and the state of Mexico
zm <- subset(totalPop, longitude >= -101 & longitude <= -98)
zm <- subset(zm, latitude >= 18 & latitude <= 21)
# Open the spatial file (shapefile)
# The area of interest is Distrito Federal (Mexico city)
# And the neighborhood state called Mexico
geo <- readOGR(dsn="shapes", layer="gadm36_MEX_2")
geo <- geo[,c(4,6,7)]
g1<-subset(geo, NAME_1=="Distrito Federal")
g2<-subset(geo,NAME_1=="México")
geo<-rbind(g1,g2)
geodf<-fortify(geo)
geo$id <- row.names(geo)
coords<-zm[,c(2,1)]
sp <- SpatialPoints(coords)
rm(coords)
# Use the following Coordinate Reference System (CRS)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(geo) <-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#assigning points into polygons
assign <- over(sp, geo)
dim(assign)
assign$rous <- rownames(assign)
zm$rous<-rownames(zm)
assign$rous<-as.factor(as.character(assign$rous))
zm$rous<-as.factor(as.character(zm$rous))
names(assign)[1]<-"ID"
# merge
zm.Map <- merge(x=assign, y=zm, by.x="rous", by.y="rous")
dim(zm.Map)
zm.Map <- zm.Map[!is.na(zm.Map$GID_2),]
head(zm.Map)
# Sum the women population per municipality
# This is the women population density map per municipality
women.zm.Mun<-aggregate(population~NAME_2+GID_2, FUN=sum, data=zm.Map, na.rm=TRUE)
names(women.zm.Mun)[3]<-"women_population"
head(women.zm.Mun)
# Merge total- and women population per mun
pop.Map <- merge(x=total.zm.Mun, y=women.zm.Mun, by.x="GID_2", by.y="GID_2")
# pop.Map is the object containing both, the total and the women population info
pop.Map$women.perc<-pop.Map$women_population/pop.Map$population_2020
pop.Map<-pop.Map[,c(1,2,3,5,6)]
names(pop.Map)[2]<-"municipality"
head(pop.Map)
rm(totalPop)
rm(zm)
################################
# Third Part
# Incorporate mobility data and
# Calculate a risk score
################################
# Load Fb Range Maps (Mobility data) and select the country of interest
mobility = fread('unzip -p movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
rm(mobility)
# Merge mobility with the population (pop.Map) object previously created
mobility.pop.Map <- merge(x=pop.Map, y=mex, by.x="GID_2", by.y="polygon_id")
names(mobility.pop.Map)[6]<-"date"
head(mobility.pop.Map)
# Select dates of interest
# In this case, we are going to use a week during the quarantine
# Rename column accordingly
mobility.pop.Map$date<-as.Date(mobility.pop.Map$date, format="%Y-%m-%d")
mobility.pop.quarantine<-subset(mobility.pop.Map, date>="2020-05-01" & date<="2020-05-07")
names(mobility.pop.quarantine)[11]<-"population.staying.home"
head(mobility.pop.quarantine)
# Get the media of population staying home per municipality
mobility.pop.quarantine<-aggregate(population.staying.home~GID_2+women_population+women.perc+population_2020, FUN=mean, data=mobility.pop.quarantine, na.rm=TRUE)
# Calculate a weighted risk score
attach(mobility.pop.quarantine)
mobility.pop.quarantine$Risk.Score<-(women.perc)*(1-population.staying.home)*(sum(pop.Map$women_population))
# Normalize risk score
attach(mobility.pop.quarantine)
mobility.pop.quarantine$nRisk.Score<-(Risk.Score-min(Risk.Score))/(max(Risk.Score)-min(Risk.Score))
mobility.pop.quarantine<-mobility.pop.quarantine[,c(1,7)]
head(mobility.pop.quarantine)
################################
# Map using ggplot2
################################
# Choropleth Map with risk score
shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile@data$id = rownames(shapefile@data)
head(shapefile@data)
class(shapefile@data)
dim(shapefile@data)
#shapefile.points = fortify(shapefile, region = "id")
shapefile.points = fortify(shapefile)
shapefile.df = join(shapefile.points, shapefile@data, by = "id")
head(shapefile.df)
shapefile.df = subset(shapefile.df, select = c(long, lat, group, GID_2))
sort(unique(shapefile.df$GID_2))
#names(shapefile.df) = c("long", "lat", "group", "NAME1")
risk.map = join(shapefile.df, mobility.pop.quarantine, by = "GID_2", type = "full")
head(risk.map)
dim(risk.map)
#chrolopleth
#png("zm.risk.score.map.png", width = 5, height = 5, units = 'in', res = 300)
p0 <- ggplot(data = risk.map,
mapping = aes(x = long, y = lat,
group = group,
fill = nRisk.Score))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.05) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_viridis_c(option = "magma", direction = -1)
p2 + theme_map() + #facet_wrap(~ year, ncol = 2) +
theme(legend.position = "right",
strip.background = element_blank()) +
#labs(fill = "Changement en pourcentage / semaine",
# "title = "Opiate Related Deaths by State, 2000-2014")
labs(fill = "nRisk.Score")
#dev.off()
################################
# Creation of files for kepler:
# risk score
################################
riskScore<-subset(risk.map, !duplicated(GID_2))
head(riskScore)
shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]
shapefile@data = join(shapefile@data, riskScore, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)
#to convert to geojson format
json <- geojson_json(shapefile)# Simplify the geometry information of GeoJSON.
geojson_write(json, geometry = "polygon", file = "nrisk.Score.geojson")
################################
# Creation of files for kepler:
# # Pre-COVID temporal window. Mobility data
################################
mobility = fread('unzip -p /home/guillermo/Documents/dpa/fbMooc/population.maps/movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
mex<-mex[,c(4,1,7)]
names(mex)[1]<-"GID_2"
names(mex)[2]<-"date"
names(mex)[3]<-"population.staying.home"
#select a temporal window of one week
#data starts at 2020-03-01
mex$date<-as.Date(mex$date, format="%Y-%m-%d")
mex<-subset(mex, date<="2020-03-07")
mex<-aggregate(population.staying.home~GID_2, FUN=mean, data=mex, na.rm=TRUE)
#need coordinates
#get them the previous object named: risk.map
coord<-subset(risk.map, !duplicated(GID_2))
coord<-coord[,c(1:4)]
#add coordinates to mex object
mex <- merge(x=mex, y=coord, by="GID_2", all.y=TRUE)
shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]
shapefile@data = join(shapefile@data, mex, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)
json <- geojson_json(shapefile)# Simplify the geometry information of GeoJSON.
geojson_write(json, geometry = "polygon", file = "mobility.precovid.geojson")
################################
# Creation of files for kepler:
# Quarantine week. Mobility data
################################
mobility = fread('unzip -p /home/guillermo/Documents/dpa/fbMooc/population.maps/movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
mex<-mex[,c(4,1,7)]
names(mex)[1]<-"GID_2"
names(mex)[2]<-"date"
names(mex)[3]<-"atHome"
mex$date<-as.Date(mex$date, format="%Y-%m-%d")
mex<-subset(mex, date>="2020-05-01")
mex<-subset(mex, date<="2020-05-07")
mex<-aggregate(atHome~GID_2, FUN=mean, data=mex, na.rm=TRUE)
#need coordinates
#get them from previous object named: risk.map
coord<-subset(risk.map, !duplicated(GID_2))
coord<-coord[,c(1:4)]
#add coordinates to mex object
mex <- merge(x=mex, y=coord, by="GID_2", all.y=TRUE)
shapefile <- readOGR(dsn="shapes", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]
shapefile@data = join(shapefile@data, mex, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)
#convert into geojson format
json <- geojson_json(shapefile)
geojson_write(json, geometry = "polygon", file = "mobility.quarantine.geojson")
################################
# Creation of files for kepler:
# population density map sample
################################
totalPop = fread('unzip -p population_mex_2018-10-01.csv.zip')
#get a sample of your total pop
sampling <- totalPop[sample(nrow(totalPop), 174579), ]
#write
write.csv(sampling, "population.density.map.sample.csv", row.names=FALSE)