#Loading your work directory and RNASeq counts and filtering the counts setwd("//files.wustl.edu/Shares/DOM/ONC/Hirbe_Lab/Diana/UBR5 KO RNASeq/analysis/HOM VS WT_2") Counts <- read.csv("Counts.csv") # Remove duplicate rows from Counts Counts <- Counts[!duplicated(Counts[, 1]), ] rownames(Counts) <- Counts[, 1] Counts<- Counts [, -1] # Calculate row means row_means <- rowMeans(Counts) # Order genes by row means in descending order ordered_counts <- Counts[rev(order(row_means)), ] #alternative code to the above is (ordered_counts <- Counts[order(row_means, decreasing = TRUE), ]) # Filter out rows with row means less than 10 filtered_counts <- ordered_counts[rowMeans(ordered_counts) >= 10, ] #save filtered data frame write.csv(filtered_counts, "filtered_counts.csv") #prepare metadata telling R the conditions (columns) metadata <- data.frame( sample_id = colnames(filtered_counts), # Assuming you have loaded the filtered expression data condition = c(rep("UBR5 WT", 3), rep("UBR5 HOM", 3)), # Treatment conditions replicate = c(1, 2, 3, 1, 2, 3) # Sample replicates ) metadata$condition <- factor(metadata$condition, levels = c("UBR5 WT", "UBR5 HOM")) #Load DESEQ2 for normalization library(DESeq2) #Use the DESeqDataSetFromMatrix function from DESeq2 to create a DESeqDataSet object dds <- DESeqDataSetFromMatrix(countData = filtered_counts, colData = metadata, design = ~ condition) #Normailzation of RNASeq data #Perform normalization and estimation of dispersions: Use the DESeq() function to perform normalization and estimation of dispersions. dds <- DESeq(dds) results <- results(dds, alpha = 0.05) DEGs <- subset(results, abs(log2FoldChange) > 1 & padj < 0.05) #save the de_genes data frame write.csv(DEGs, file = "DEG_HOM_VS_WT.csv",) write.csv(results, file = "DeseqResults_HOM_VS_WT.csv",) #Visualizing your differentially expressed genes #create volcano plot library(ggplot2) # Add column to classify genes as DEG or not results_df <- as.data.frame(results) results_df$gene <- rownames(results_df) results_df$threshold <- "Unchanged" results_df$threshold[results_df$padj < 0.05 & abs(results_df$log2FoldChange) > 1] <- "DEG" library(ggrepel) # Volcano plot ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) + geom_point(alpha = 0.6, size = 1.5) + scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) + theme_minimal(base_size = 14) + labs(title = "Volcano Plot: HOM vs WT", x = "log2 Fold Change (HOM vs WT)", y = "-log10 Adjusted p-value", color = "Gene status") + geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") # Select top 50 significant genes by padj top100 <- results_df[order(results_df$padj), ][1:100, ] # Volcano plot ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) + geom_point(alpha = 0.6, size = 1.5) + scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) + theme_minimal(base_size = 14) + labs(title = "Volcano Plot: HOM vs WT", x = "log2 Fold Change (HOM vs WT)", y = "-log10 Adjusted p-value", color = "Gene status") + geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") + geom_text_repel(data = top100, aes(label = gene), size = 3, max.overlaps = Inf, box.padding = 0.3, point.padding = 0.2, segment.color = "grey50") write.csv(top100, file = "Top100_HOM_vs_WT.csv", row.names = FALSE) #Querrying genes of interest i.e UBR5 substrates as determined form the papers you have read # Define genes of interest genes_of_interest <- c("Egfr", "Hsp90ab1", "Map2k2", "Cerk", "Pdgfra", "Tyk2", "Jak1", "Yap1", "Taz", "Kdr", "Aurka", "Pten", "Csf1r","Ptch1", "Smo", "Gli2", "Gli3", "Wnt10a", "Rac2", "Rspo2", "Apc", "Cd274", "Pdcd1", "Id1", "Id3", "Cdh1", "Cdc73", "Hrpt2","Csf1","Golph3", "Cdk1", "Acsl4", "Ptk2b", "Akt1", "Akt2", "Akt3", "Pik3ca", "Pik3c2a", "Pik3cb" , "Pik3c3", "Pik3c2b", "Pik3cd", "Atmin", "Cdkn1a", "Cdk9", "Rela", "Nfkb1", "Nfkb2", "Capza1", "Stat1", "Stat3", "Irf1", "Irf3") # Subset DEGs for these genes (case-sensitive match!) genes_subset <- results_df[rownames(results_df) %in% genes_of_interest, ] # Save genes of interest with stats write.csv(genes_subset, file = "GenesOfInterest_HOM_vs_WT.csv", row.names = TRUE) ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) + geom_point(alpha = 0.6, size = 1.5) + scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) + theme_minimal(base_size = 14) + labs(title = "Volcano Plot: HET vs WT", x = "log2 Fold Change (HET vs WT)", y = "-log10 Adjusted p-value", color = "Gene status") + geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") + geom_text_repel( data = genes_subset, aes(label = gene), color = "black", # <-- force label text to black size = 3, max.overlaps = Inf, box.padding = 0.3, point.padding = 0.2, segment.color = "grey50" ) # Subset DEGs only DEGs_df <- as.data.frame(DEGs) DEGs_df$gene <- rownames(DEGs_df) # Find overlap between DEGs and genes of interest genes_subset <- DEGs_df[rownames(DEGs_df) %in% genes_of_interest, ] # Save overlapping genes with stats write.csv(genes_subset, file = "GenesOfInterest_DEGs_HOM_vs_WT.csv", row.names = TRUE) # Volcano plot with labels ONLY for genes of interest that are DEGs ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) + geom_point(alpha = 0.6, size = 1.5) + scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) + theme_minimal(base_size = 14) + labs(title = "Volcano Plot: HET vs WT", x = "log2 Fold Change (HET vs WT)", y = "-log10 Adjusted p-value", color = "Gene status") + geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") + geom_text_repel( data = genes_subset, aes(label = gene), color = "black", # labels in black size = 5, max.overlaps = Inf, box.padding = 0.3, point.padding = 0.2, segment.color = "grey50" ) #Running pathway enrichment analysis to determine pathways enriched following UBR5 KO if (!requireNamespace("clusterProfiler", quietly = TRUE)) { BiocManager::install("clusterProfiler") } if (!requireNamespace("msigdbr", quietly = TRUE)) { install.packages("msigdbr") } library(clusterProfiler) library(msigdbr) # Convert results to dataframe res_df <- as.data.frame(results) # Remove NA log2FC res_df <- res_df[!is.na(res_df$log2FoldChange), ] # Create named vector: names = gene symbols, values = log2FC gene_list <- res_df$log2FoldChange names(gene_list) <- rownames(res_df) # Sort decreasing for clusterProfiler gene_list <- sort(gene_list, decreasing = TRUE) # Mouse Hallmark gene sets hallmark_sets <- msigdbr(species = "Mus musculus", category = "H") # H = Hallmark # Use as two-column dataframe: gs_name (pathway), gene_symbol term2gene <- hallmark_sets[, c("gs_name", "gene_symbol")] # Make sure your DESeq2 results have no NA log2FC res_df <- as.data.frame(results) res_df <- res_df[!is.na(res_df$log2FoldChange), ] # Named vector: names = gene symbols, values = log2FC gene_list <- res_df$log2FoldChange names(gene_list) <- rownames(res_df) gene_list <- sort(gene_list, decreasing = TRUE) gsea_res <- GSEA( geneList = gene_list, TERM2GENE = term2gene, # <- must be dataframe, not list pvalueCutoff = 0.1, verbose = FALSE ) # View top pathways head(as.data.frame(gsea_res)) # Save results write.csv(as.data.frame(gsea_res), "GSEA_Hallmark_Mouse_HOM_vs_WT.csv", row.names = FALSE) library(enrichplot) # Convert GSEA results to dataframe gsea_df <- as.data.frame(gsea_res) # Suppose the top (and only) enriched pathway: top_pathway <- gsea_df$ID[5] # or use $Description if you prefer # Classic GSEA plot for the top pathway gseaplot2( gsea_res, geneSetID = top_pathway, # pathway ID title = gsea_df$Description[5], # nice descriptive title color = "red" ) #PLOT HALLMARK PATHWAYS library(ggplot2) # Convert GSEA results to dataframe gsea_df <- as.data.frame(gsea_res) # Order pathways by NES (normalized enrichment score) gsea_df <- gsea_df[order(gsea_df$NES, decreasing = TRUE), ] # Plot ALL enriched pathways ggplot(gsea_df, aes(x = reorder(Description, NES), y = NES, fill = -log10(p.adjust))) + geom_col() + coord_flip() + labs( title = "GSEA: All Enriched Hallmark Pathways", x = "Pathway", y = "Normalized Enrichment Score (NES)", fill = "-log10 adj p-value" ) + theme_minimal(base_size = 14) #plot GSEA KEGG, GO, REACTOME -------------------------- # Prepare ranked gene list with Entrez IDs for GSEA # -------------------------- library(clusterProfiler) library(org.Mm.eg.db) library(ReactomePA) # Convert gene symbols to Entrez IDs entrez_map <- bitr(names(gene_list), fromType="SYMBOL", toType="ENTREZID", OrgDb=org.Mm.eg.db) gene_list_df <- merge(entrez_map, data.frame(log2FC = gene_list), by.x="SYMBOL", by.y="row.names") gene_list_df <- gene_list_df[!duplicated(gene_list_df$ENTREZID), ] gene_list_named <- gene_list_df$log2FC names(gene_list_named) <- gene_list_df$ENTREZID # Sort decreasing for GSEA gene_list_named <- sort(gene_list_named, decreasing = TRUE) # -------------------------- # 1) GSEA: KEGG Pathways # -------------------------- gsea_kegg <- gseKEGG( geneList = gene_list_named, organism = "mmu", minGSSize = 10, pvalueCutoff = 0.1, verbose = TRUE ) # Save KEGG GSEA results write.csv(as.data.frame(gsea_kegg), "GSEA_KEGG_HOM_vs_WT.csv", row.names = FALSE) # Top 30 KEGG pathways barplot library(enrichplot) library(ggplot2) # Convert gseaResult to dataframe to see top pathways gsea_df <- as.data.frame(gsea_kegg) # Select top 30 pathways by NES or pvalue top30 <- gsea_df[order(gsea_df$NES, decreasing = TRUE)[1:30], ] # Ridgeplot (shows enrichment distribution for multiple pathways) ridgeplot(gsea_kegg, showCategory = 30) + ggtitle("GSEA: KEGG Top 30 Pathways") + theme_minimal(base_size = 14) # Optional: classic GSEA plot for the top pathway top_pathway <- top30$ID[1] gseaplot2(gsea_kegg, geneSetID = top_pathway, title = top30$Description[1], color = "red") # -------------------------- # 2) GSEA: GO Biological Process (BP) # -------------------------- gsea_go_bp <- gseGO( geneList = gene_list_named, OrgDb = org.Mm.eg.db, ont = "ALL", keyType = "ENTREZID", minGSSize = 10, maxGSSize = 500, pvalueCutoff = 0.1, verbose = TRUE ) # Save GO BP GSEA results write.csv(as.data.frame(gsea_go_bp), "GSEA_GO_BP_HOM_vs_WT.csv", row.names = FALSE) # Top 30 GO BP pathways barplot barplot(gsea_go_bp, showCategory = 30, title = "GSEA: GO BP Top 30 Pathways") # -------------------------- # 3) GSEA: Reactome Pathways # -------------------------- gsea_reactome <- gsePathway( geneList = gene_list_named, organism = "mouse", minGSSize = 10, pvalueCutoff = 0.1, verbose = TRUE ) # Save Reactome GSEA results write.csv(as.data.frame(gsea_reactome), "GSEA_Reactome_HOM_vs_WT.csv", row.names = FALSE) # Top 30 Reactome pathways barplot barplot(gsea_reactome, showCategory = 30, title = "GSEA: Reactome Top 30 Pathways") # -------------------------- library(ggplot2) library(dplyr) # Convert GSEA Reactome results to dataframe gsea_reactome_df <- as.data.frame(gsea_reactome) # Select top 30 pathways by NES magnitude top30_reactome <- gsea_reactome_df %>% arrange(desc(abs(NES))) %>% slice(1:30) # Reorder for plotting (highest NES on top) top30_reactome$Description <- factor(top30_reactome$Description, levels = rev(top30_reactome$Description)) # Plot barplot: NES on x-axis, pathways on y-axis, fill by -log10(padj) ggplot(top30_reactome, aes(x = NES, y = Description, fill = -log10(p.adjust))) + geom_bar(stat = "identity") + scale_fill_gradient(low = "red", high = "darkred") + theme_minimal(base_size = 14) + labs(title = "GSEA: Top 30 Reactome Pathways", x = "Normalized Enrichment Score (NES)", y = "", fill = "-log10(adj.p)") #Querrying the xepression of genes within a particular pathway you may be interested in i.e type I interferon signaling ifna_genes <- c( "Ifi208","Mndal","Pdcd5-ps","Gstp-ps","Ifi203-ps","Rrp8","Ifi206","Chchd10","Inca1","Ppif", "Topors","Rrn3","Eaf2","Ticam1","Unc5b","Bmyc","Cth","Pttg1ip","Selenos","Fgb","Raf1", "Ggct","Tmbim6","Kcnq3","Scn2a","D1Pas1","Acvr1","Pawr","Acvr1b","Adora2a","Parp1","Parp2", "Agt","Agtr2","Akt1","Aldh2","Alox12","Ivns1abp","Slc25a4","Slc25a5","Anxa6","Apaf1","App", "Ar","Atf2","Atf3","Atf4","Atm","Atp2a1","Atp7a","Atp5if1","Avp","Bad","Bak1","Bax","Bcl10", "Bcl2","Bcl2a1a","Bcl2a1b","Bcl2a1c","Bcl2a1d","Bcl2l1","Bcl2l10","Bcl2l2","Bcl3","Bdkrb2", "Bdnf","Bex3","Bid","Hrk","Bik","Bcl2l11","Bmi1","Bmp4","Bmpr1b","Bnip3","Bnip3l","Brca1", "Brca2","Birc6","Bub1","Hyou1","Camk2b","Casp12","Casp2","Casp3","Casp6","Casp8","Casp9", "Ctnna1","Ctnnb1","Cav1","Runx3","Rb1cc1","Cck","Cd24a","Cd28","Cd3e","Cd44","Cd5","Cdk11b", "Cdkn1a","Cdkn2d","Cebpb","Cflar","Clu","Ackr3","Col2a1","Cradd","Creb3","Crh","Crip1", "Csf2","Csnk2a1","Ctsc","Ctsh","Cttn","Cx3cr1","Cycs","Cyct","Cyp1b1","Dab2","Dapk2", "Dapk3","Daxx","Dbh","Ddit3","Ddx3x","Ddx5","E2f1","Ei24","Eif2ak3","Eno1","Epha2","Epo", "Erbb3","Ercc2","Ptprv","Esr2","Eya1","Eya2","Eya3","Eya4","Fadd","Faf1","Fas","Fasl", "Fcgr2b","Fem1b","Fga","Fgf10","Fgf2","Fgfr1","Fgfr2","Fgfr3","Fhit","Fxn","Tlr3","Fyn", "Fzd1","Fzd9","G0s2","Gas1","Gata1","Gata4","Usp15","Gcg","Gdnf","Gclc","Gclm","Gnai2","Gnai3", "Rack1","Gpx1","Pdia3","Gstp2","Gstp1","Hdac2","Htt","Hells","Hgf","Hic1","Hif1a","Hint1", "Hipk1","Hipk2","Hmox1","Hnrnpk","Hras","Dnaja1","Hspb1","Hspa1b","Hyal2","Icam1","Ier3", "Ifi203","Ifi204","Ifnb1","Ifng","Igf1","Cd74","Ikbkg","Il10","Il12a","Il18","Il1a","Il1b", "Il2","Il3","Il4","Il7","Inhba","Inhbb","Ins2","Itga6","Itgav","Itpr1","Jak2","Jak3","Jun", "Kcnq2","Klf4","Krt18","Krt8","Lck","Lcn2","Lgals3","Lmna","Lta","Ltb","Ltbr","Mfn2","Sgk3", "Bbc3","Rtkn2","Smad3","Smad4","Mal","Bmf","Maz","Mbd4","Mcl1","Mdm2","Mdm4","Melk","Kitl", "Mif","Mknk1","Mknk2","Mlh1","Mmp2","Mmp9","Mnt","Meis3","Msh2","Msh6","Msx1","Mapt","Muc1", "Myc","Nck1","Nck2","Nf1","Nfe2l2","Ngf","Ngfr","Nkx3-1","Nodal","Nog","Nrp1","Nr4a2","Osm", "Mybbp1a","P2rx4","P2rx7","P4hb","Igbp1","Pdk2","Pdpk1","Pdx1","Pea15a","Pik3r1","Prkca", "Prkcd","Serpine1","Plaur","Pml","Pmp22","Pnp","Septin4","Polb","Pou4f1","Pou4f2","Ppard", "Ppef2","Ppp1ca","Prkdc","Mapk8ip1","Prodh","Psen1","Psen2","Psme3","Pten","Ptgis","Ptgs2", "Ptpn1","Ptpn2","Ptprc","Rad9a","Nlrp1a","Rb1","Rela","Ret","Ripk1","Uri1","Rnf7","Rock2", "Rpl26","Rps7","S100a8","S100a9","Scg2","Cx3cl1","Cxcl12","Sfrp2","Spi1","Sfrp1","Sgpl1", "Shh","Siah1a","Siah1b","Siah2","Skil","Snai2","Siglec1","Snai1","Sod1","Sod2","Sort1","Sp100", "Spn","Spop","Src","Stk11","Pycr1","Stx4a","Trp53bp2","Syk","Nrg1","Tifab","Taf6","Tcf7l2", "Rhot2","Hip1","Agap2","Prdx2","Flcn","Arrb2","Tmc8","Tert","G2e3","Ifi27l2b","Tgfb1","Tgfb2", "Tgfbr1","Thbs1","Tlr4","Tlr6","Ccar2","Tnf","Tnfaip3","Tnfrsf10b","Tnfrsf1a","Tnfrsf1b", "Cd27","Tnfsf11","Tnfsf12","Dedd","Cd40lg","Cd70","Ppp1r13b","Tpd52l1","Traf1","Traf2", "Tnfsf10","Plscr1","Trp53","Trp63","Trp73","Tpt1","Tnfrsf4","Tnfsf4","Ubb","Umod","Kdm6a", "Stk24","Vdac2","Vdr","Vegfa","Dap","Vhl","Vnn1","Mrtfa","Senp1","Wfs1","Wnt1","Pak2", "Wnt4","Wnt5a","Xbp1","Traf7","Bag6","Gstp3","Xpa","Yap1","Zfp13","Pcgf2","Ifi207","Stradb", "Pdk1","Madd","Trib3","Eif2a","Tmem161a","Usp28","Ifi209","Nox1","Il20ra","Atad5","Dido1", "Faim","Map2k5","Mapk7","Prkra","Peli3","Rbck1","Zfp385b","Pak5","E2f2","Nanos3","Eda2r", "AY074887","Map2k1","Map2k4","Map3k5","Map3k7","Mapk8","Mapk9","Creb3l1","Ppia","Casp8ap2", "Ern2","Aifm1","Acvr1c","Ppp2r5c","Ell3","Nherf1","Serinc3","Rps3","Bcap31","Adcy10","Tnfrsf12a", "Phlda3","Nbn","Cep63","Bag3","Zfp385a","Hip1r","Siva1","Ifnz","Ercc6","Tmem117","Tnfsf15", "Ep300","Il19","Fnip2","Card9","Tmem102","Parl","Rrm2b","Gfral","Itprip","Eno1b","Acsl5", "Mettl21c","Hdac1","Gsdma3","Ero1a","Fbxw7","Fbh1","Prkn","Chek2","Tnfsf14","Pdcd7","Ppp2r1a", "Srpx","Bok","Zfp622","Acaa2","Ifi27","Atp2a3","Ube2k","Pla2g6","Psmd10","Nono","Asah2","Ifi214", "Pde3a","Sh3glb1","Plagl2","Gsdme","Sfn","Lgals12","Ubqln1","Becn1","Stk3","Higd1a","Nupr1", "Aatf","Pdcd5","Pdcd10","Mtch2","Ybx3","Foxo3","Gabarap","Ikbke","Ripk3","Gsk3b","Ankrd2", "Mllt11","Park7","Marchf7","Noc2l","Jmy","Pidd1","Stk4","Pmaip1","Pias4","Sh3rf1","Rhot1", "Stk25","Fignl1","Mapk8ip2","Gsk3a","Ifi213","Faiml","Nlrp1b","Ube4b","Perp","Moap1","Herpud1", "Itm2c","Htra2","Zfp110","Arl6ip5","Txndc12","Ghitm","Eef1e1","Grina","Ing5","Snw1","Fis1", "Pam16","Ptpmt1","Prelid1","Zmynd11","Timm50","Diablo","Cdip1","Lrrk2","Gskip","Bcl2l14", "Pycard","Rnf186","Dele1","Dnajc10","Shisa5","Ndufa13","Armc10","Rffl","Dedd2","Erp29", "Rnf41","Ddx47","Rps27l","Nacc2","Trap1","Coa8","Aen","Ndufs3","Mul1","Steap3","Tmem109", "Ppm1f","Pink1","Zfas1","Zdhhc3","Chac1","Triap1","Fcmr","Dyrk2","Qrich1","Ing2","Dab2ip", "Dapk1","Tmbim1","Tfpt","Fbxo7","Trim32","Fam162a","Plscr3","Bag5","Sfpq","Tmem238l","Tradd", "Zswim2","Faim2","Rps6kb1","Uaca","Bclaf1","Nfatc4","Slc25a31","Bloc1s2","Ppp2r1b","Bbln", "Dnm1l","Ddias","Syvn1","Opa1","Cyld","Wdr35","Ddit4","Pik3cb","Slc35f6","Usp47","Nme5", "Tmem14a","Mff","Bcl2l12","Brsk2","Rnf183","Knl1","Styxl1","Dapl1","Gper1","Ifi27l2a", "Il33","Nol3","Ern1","Tnfrsf23","Tnfrsf22","Trim39","Wwox","Rnf34","Selenok","Clca3a2", "Nfkbiz","Sgpp1","Trem2","Trps1","Phip","Mpv17l","Wnt16","Sirt1","Tm2d1","Maged1","Hmgb2", "Qars1","Deptor","Mael","Fgg","Kdm1a" ) # Get normalized counts from DESeq2 norm_counts <- counts(dds, normalized=TRUE) # Subset for IFN-α response genes (keep only genes present in your dataset) ifna_counts <- norm_counts[rownames(norm_counts) %in% ifna_genes, ] # Optionally, z-score normalize each gene for heatmap visualization ifna_counts_z <- t(scale(t(ifna_counts))) # Create annotation for columns ann_col <- data.frame( Condition = metadata$condition ) rownames(ann_col) <- metadata$sample_id library(pheatmap) # Make sure columns are in your desired order desired_order <- c("sample.WT_1", "sample.WT_2", "sample.WT_3", "sample.neg.neg_1", "sample.neg.neg_2", "sample.neg.neg_3") ifna_counts_z <- ifna_counts_z[, desired_order] # Make sure annotation matches ann_col <- ann_col[desired_order, , drop = FALSE] # Heatmap pheatmap(ifna_counts_z, annotation_col = ann_col, show_rownames = TRUE, show_colnames = TRUE, cluster_rows = TRUE, cluster_cols = FALSE, # keep the column order fixed scale = "row", fontsize_row = 8, main = "HALLMARK_APOPTOSIS") # Subset DEGs for IFNa-related genes ifna_DEGs <- DEGs_df[DEGs_df$gene %in% ifna_genes, ] # Save to CSV write.csv(ifna_DEGs, "GOBP_APOPTOSIS_HOM_vs_WT DEG.csv", row.names = FALSE) # Save subset expression to CSV write.csv(as.data.frame(ifna_counts), file = "GOBP_APOPTOSIS_HOM VS WT.csv", row.names = TRUE) # Quick check print(ifna_DEGs) library(ggplot2) library(ggrepel) ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) + geom_point(alpha = 0.6, size = 1.5) + scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) + theme_minimal(base_size = 14) + labs( title = "HALLMARK_APOPTOSIS: HOM vs WT", x = "log2 Fold Change (HOM vs WT)", y = "-log10 Adjusted p-value", color = "Gene status" ) + geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") + # Label only IFNa DEGs geom_text_repel( data = ifna_DEGs, aes(x = log2FoldChange, y = -log10(padj), label = gene), inherit.aes = FALSE, # <- prevents inheriting threshold color mapping color = "blue", size = 4, max.overlaps = Inf, box.padding = 0.3, point.padding = 0.2, segment.color = "black" ) #Heatmap for DEGs GOBP Apoptosis # Subset DEGs for IFNa-related genes ifna_DEGs <- DEGs_df[DEGs_df$gene %in% ifna_genes, ] # Subset normalized counts to only DEGs ifna_counts_DEG <- ifna_counts[rownames(ifna_counts) %in% ifna_DEGs$gene, ] # Z-score normalize each DEG for heatmap ifna_counts_DEG_z <- t(scale(t(ifna_counts_DEG))) # Make sure columns are in your desired order ifna_counts_DEG_z <- ifna_counts_DEG_z[, desired_order] # Annotation matches columns ann_col_DEG <- ann_col[desired_order, , drop = FALSE] # Heatmap of only IFNa DEGs pheatmap(ifna_counts_DEG_z, annotation_col = ann_col_DEG, show_rownames = TRUE, show_colnames = TRUE, cluster_rows = TRUE, cluster_cols = FALSE, # keep column order fixed scale = "row", fontsize_row = 8, main = "GOBP Apoptosis_DEGs Heatmap")