labbook part 6 res 0.25
Wed Jan 22 2025 19:37:42 GMT+0000 (Coordinated Universal Time)
Saved by @eho135
# 6 Res 0.25 #### # ensure assay is integrated DefaultAssay(so_donorint) <- "integrated" # set resolution at 0.25 which is higher than previously used 0.05 for clustering cells res <- 0.25 # now assign cells to a cluster and run UMAP - reduce dimensions so I can visualize # UMAP allows visualizing clusters # 2D representation of cells to visualize similarities/differences based on high dimensional features (i.e gene expression) - based on first PCA so_donorint <- FindClusters(so_donorint, resolution = res) so_donorint <- RunUMAP(so_donorint, dims=1:pcs, return.model = TRUE, verbose = FALSE) # UMAP plot - Visualization of my clustered data using UMAP plot DimPlot(so_donorint, reduction = "umap", label = TRUE, label.size = 6) # as expected, further subdivision of community X shown ggsave("plots5/UMAP_res0.25.png") # split UMAP plot - based on 'conditions' + add labels # shows me how clusters are distributed across different experimental conditions DimPlot(so_donorint, reduction = "umap", split.by = "Conditions", label = TRUE, label.size = 6) # similar to UMAP at res 0.05, community 0 shown to lack large proportion of cells ggsave("plots5/split_UMAP_res0.25.png") # % distribution of each cell cluster across different conditions # allows me to understand how cell populations are organized in relation to experimental conditions round(prop.table(table(Idents(so_donorint), so_donorint$Conditions))*100,3) # DEA for cell communities at resolution 0.25 # now prepare DEA # switch back to integrated assay for comparisons DefaultAssay(so_donorint) <- "integrated" # run DEA to identify differentially expressed genes between community 1, 0 uro.markers <- FindMarkers(so_donorint, ident.1 = 7, ident.2 = 5) # graph description is based on ident.1 = community x # but I have repeated the above code with different 'ident' to compare gene expression between communities # check data frame head(uro.markers) # avoid errors caused by small p-values downstream by setting p-value limits # all adjusted p-value > or = to (1e-300) uro.markers["p_val_adj"] <- lapply(uro.markers["p_val_adj"], pmax, 1e-300) # preparation for volcano plot # categorize genes (in uro.markers) based on their DEA results # create new column 'DEA' to classify each gene as up, down, no based on log fold-change and adjusted p-value uro.markers$DEA <- "NO" uro.markers$DEA[uro.markers$avg_log2FC > 1 & uro.markers$p_val_adj < 0.05] <- "UP" uro.markers$DEA[uro.markers$avg_log2FC < -1 & uro.markers$p_val_adj < 0.05] <- "DOWN" uro.markers$genes <- rownames(uro.markers) # Volcano plot to visualize DEA results for comparison between the conditions (Ta1, Ta6) ggplot(uro.markers, aes(x=avg_log2FC, y=-log10(p_val_adj))) + geom_point(aes(colour = DEA), show.legend = FALSE) + scale_colour_manual(values = c("blue", "gray", "red")) + geom_hline(yintercept = -log10(0.05), linetype = "dotted") + geom_vline(xintercept = c(-1,1), linetype = "dotted") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), plot.title = element_text(hjust = 0.5)) + geom_text_repel(data=subset(uro.markers, (abs(avg_log2FC) > 9 & p_val_adj < 0.05)| (avg_log2FC < -5 & p_val_adj < 0.05)), aes(x=avg_log2FC, y=-log10(p_val_adj), label=genes), max.overlaps = 1000, size=4.5) + ggtitle("DEA between community 7 and 5 (7 in red)") # Extract upregulated genes (log fold change > 4, p-value < 0.05 ) upreg_res_0.25 <- uro.markers[uro.markers$avg_log2FC > 2 & uro.markers$p_val_adj < 0.05, "genes"] # Extract downregulated genes (log fold change > 4, p-value < 0.05 ) downreg_res_0.25<- uro.markers[uro.markers$avg_log2FC < -3 & uro.markers$p_val_adj < 0.05, "genes"] print(upreg_res_0.25) print(downreg_res_0.25) ggsave("plots5/volcano_integrated_7vs5.png")
Comments