# 6 Res 0.25 ####
# ensure assay is integrated
DefaultAssay(so_donorint) <- "integrated"
# set resolution at 0.25 which is higher than previously used 0.05 for clustering cells
res <- 0.25
# now assign cells to a cluster and run UMAP - reduce dimensions so I can visualize
# UMAP allows visualizing clusters
# 2D representation of cells to visualize similarities/differences based on high dimensional features (i.e gene expression) - based on first PCA
so_donorint <- FindClusters(so_donorint, resolution = res)
so_donorint <- RunUMAP(so_donorint, dims=1:pcs, return.model = TRUE, verbose = FALSE)
# UMAP plot - Visualization of my clustered data using UMAP plot
DimPlot(so_donorint, reduction = "umap", label = TRUE, label.size = 6)
# as expected, further subdivision of community X shown
ggsave("plots5/UMAP_res0.25.png")
# split UMAP plot - based on 'conditions' + add labels
# shows me how clusters are distributed across different experimental conditions
DimPlot(so_donorint, reduction = "umap", split.by = "Conditions", label = TRUE, label.size = 6)
# similar to UMAP at res 0.05, community 0 shown to lack large proportion of cells
ggsave("plots5/split_UMAP_res0.25.png")
# % distribution of each cell cluster across different conditions
# allows me to understand how cell populations are organized in relation to experimental conditions
round(prop.table(table(Idents(so_donorint), so_donorint$Conditions))*100,3)
# DEA for cell communities at resolution 0.25
# now prepare DEA
# switch back to integrated assay for comparisons
DefaultAssay(so_donorint) <- "integrated"
# run DEA to identify differentially expressed genes between community 1, 0
uro.markers <- FindMarkers(so_donorint, ident.1 = 7, ident.2 = 5)
# graph description is based on ident.1 = community x
# but I have repeated the above code with different 'ident' to compare gene expression between communities
# check data frame
head(uro.markers)
# avoid errors caused by small p-values downstream by setting p-value limits
# all adjusted p-value > or = to (1e-300)
uro.markers["p_val_adj"] <- lapply(uro.markers["p_val_adj"], pmax, 1e-300)
# preparation for volcano plot
# categorize genes (in uro.markers) based on their DEA results
# create new column 'DEA' to classify each gene as up, down, no based on log fold-change and adjusted p-value
uro.markers$DEA <- "NO"
uro.markers$DEA[uro.markers$avg_log2FC > 1 & uro.markers$p_val_adj < 0.05] <- "UP"
uro.markers$DEA[uro.markers$avg_log2FC < -1 & uro.markers$p_val_adj < 0.05] <- "DOWN"
uro.markers$genes <- rownames(uro.markers)
# Volcano plot to visualize DEA results for comparison between the conditions (Ta1, Ta6)
ggplot(uro.markers, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
geom_point(aes(colour = DEA), show.legend = FALSE) +
scale_colour_manual(values = c("blue", "gray", "red")) +
geom_hline(yintercept = -log10(0.05), linetype = "dotted") +
geom_vline(xintercept = c(-1,1), linetype = "dotted") +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), plot.title = element_text(hjust = 0.5)) +
geom_text_repel(data=subset(uro.markers, (abs(avg_log2FC) > 9 & p_val_adj < 0.05)| (avg_log2FC < -5 & p_val_adj < 0.05)), aes(x=avg_log2FC, y=-log10(p_val_adj), label=genes), max.overlaps = 1000, size=4.5) +
ggtitle("DEA between community 7 and 5 (7 in red)")
# Extract upregulated genes (log fold change > 4, p-value < 0.05 )
upreg_res_0.25 <- uro.markers[uro.markers$avg_log2FC > 2 & uro.markers$p_val_adj < 0.05, "genes"]
# Extract downregulated genes (log fold change > 4, p-value < 0.05 )
downreg_res_0.25<- uro.markers[uro.markers$avg_log2FC < -3 & uro.markers$p_val_adj < 0.05, "genes"]
print(upreg_res_0.25)
print(downreg_res_0.25)
ggsave("plots5/volcano_integrated_7vs5.png")