#Loading your work directory and RNASeq counts and filtering the counts
setwd("//files.wustl.edu/Shares/DOM/ONC/Hirbe_Lab/Diana/UBR5 KO RNASeq/analysis/HOM VS WT_2")
Counts <- read.csv("Counts.csv")
# Remove duplicate rows from Counts
Counts <- Counts[!duplicated(Counts[, 1]), ]
rownames(Counts) <- Counts[, 1]
Counts<- Counts [, -1]
# Calculate row means
row_means <- rowMeans(Counts)
# Order genes by row means in descending order
ordered_counts <- Counts[rev(order(row_means)), ]
#alternative code to the above is (ordered_counts <- Counts[order(row_means, decreasing = TRUE), ])
# Filter out rows with row means less than 10
filtered_counts <- ordered_counts[rowMeans(ordered_counts) >= 10, ]
#save filtered data frame
write.csv(filtered_counts, "filtered_counts.csv")
#prepare metadata telling R the conditions (columns)
metadata <- data.frame(
sample_id = colnames(filtered_counts), # Assuming you have loaded the filtered expression data
condition = c(rep("UBR5 WT", 3), rep("UBR5 HOM", 3)), # Treatment conditions
replicate = c(1, 2, 3, 1, 2, 3) # Sample replicates
)
metadata$condition <- factor(metadata$condition, levels = c("UBR5 WT", "UBR5 HOM"))
#Load DESEQ2 for normalization
library(DESeq2)
#Use the DESeqDataSetFromMatrix function from DESeq2 to create a DESeqDataSet object
dds <- DESeqDataSetFromMatrix(countData = filtered_counts,
colData = metadata,
design = ~ condition)
#Normailzation of RNASeq data
#Perform normalization and estimation of dispersions: Use the DESeq() function to perform normalization and estimation of dispersions.
dds <- DESeq(dds)
results <- results(dds, alpha = 0.05)
DEGs <- subset(results, abs(log2FoldChange) > 1 & padj < 0.05)
#save the de_genes data frame
write.csv(DEGs, file = "DEG_HOM_VS_WT.csv",)
write.csv(results, file = "DeseqResults_HOM_VS_WT.csv",)
#Visualizing your differentially expressed genes
#create volcano plot
library(ggplot2)
# Add column to classify genes as DEG or not
results_df <- as.data.frame(results)
results_df$gene <- rownames(results_df)
results_df$threshold <- "Unchanged"
results_df$threshold[results_df$padj < 0.05 & abs(results_df$log2FoldChange) > 1] <- "DEG"
library(ggrepel)
# Volcano plot
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HOM vs WT",
x = "log2 Fold Change (HOM vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")
# Select top 50 significant genes by padj
top100 <- results_df[order(results_df$padj), ][1:100, ]
# Volcano plot
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HOM vs WT",
x = "log2 Fold Change (HOM vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(data = top100,
aes(label = gene),
size = 3,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "grey50")
write.csv(top100, file = "Top100_HOM_vs_WT.csv", row.names = FALSE)
#Querrying genes of interest i.e UBR5 substrates as determined form the papers you have read
# Define genes of interest
genes_of_interest <- c("Egfr", "Hsp90ab1", "Map2k2", "Cerk", "Pdgfra", "Tyk2", "Jak1",
"Yap1", "Taz", "Kdr", "Aurka", "Pten", "Csf1r","Ptch1", "Smo", "Gli2", "Gli3", "Wnt10a", "Rac2", "Rspo2", "Apc",
"Cd274", "Pdcd1", "Id1", "Id3", "Cdh1", "Cdc73", "Hrpt2","Csf1","Golph3", "Cdk1", "Acsl4", "Ptk2b", "Akt1", "Akt2", "Akt3", "Pik3ca", "Pik3c2a", "Pik3cb" , "Pik3c3", "Pik3c2b", "Pik3cd", "Atmin", "Cdkn1a", "Cdk9", "Rela", "Nfkb1", "Nfkb2", "Capza1", "Stat1", "Stat3", "Irf1", "Irf3")
# Subset DEGs for these genes (case-sensitive match!)
genes_subset <- results_df[rownames(results_df) %in% genes_of_interest, ]
# Save genes of interest with stats
write.csv(genes_subset, file = "GenesOfInterest_HOM_vs_WT.csv", row.names = TRUE)
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HET vs WT",
x = "log2 Fold Change (HET vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(
data = genes_subset,
aes(label = gene),
color = "black", # <-- force label text to black
size = 3,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "grey50"
)
# Subset DEGs only
DEGs_df <- as.data.frame(DEGs)
DEGs_df$gene <- rownames(DEGs_df)
# Find overlap between DEGs and genes of interest
genes_subset <- DEGs_df[rownames(DEGs_df) %in% genes_of_interest, ]
# Save overlapping genes with stats
write.csv(genes_subset, file = "GenesOfInterest_DEGs_HOM_vs_WT.csv", row.names = TRUE)
# Volcano plot with labels ONLY for genes of interest that are DEGs
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HET vs WT",
x = "log2 Fold Change (HET vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(
data = genes_subset,
aes(label = gene),
color = "black", # labels in black
size = 5,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "grey50"
)
#Running pathway enrichment analysis to determine pathways enriched following UBR5 KO
if (!requireNamespace("clusterProfiler", quietly = TRUE)) {
BiocManager::install("clusterProfiler")
}
if (!requireNamespace("msigdbr", quietly = TRUE)) {
install.packages("msigdbr")
}
library(clusterProfiler)
library(msigdbr)
# Convert results to dataframe
res_df <- as.data.frame(results)
# Remove NA log2FC
res_df <- res_df[!is.na(res_df$log2FoldChange), ]
# Create named vector: names = gene symbols, values = log2FC
gene_list <- res_df$log2FoldChange
names(gene_list) <- rownames(res_df)
# Sort decreasing for clusterProfiler
gene_list <- sort(gene_list, decreasing = TRUE)
# Mouse Hallmark gene sets
hallmark_sets <- msigdbr(species = "Mus musculus", category = "H") # H = Hallmark
# Use as two-column dataframe: gs_name (pathway), gene_symbol
term2gene <- hallmark_sets[, c("gs_name", "gene_symbol")]
# Make sure your DESeq2 results have no NA log2FC
res_df <- as.data.frame(results)
res_df <- res_df[!is.na(res_df$log2FoldChange), ]
# Named vector: names = gene symbols, values = log2FC
gene_list <- res_df$log2FoldChange
names(gene_list) <- rownames(res_df)
gene_list <- sort(gene_list, decreasing = TRUE)
gsea_res <- GSEA(
geneList = gene_list,
TERM2GENE = term2gene, # <- must be dataframe, not list
pvalueCutoff = 0.1,
verbose = FALSE
)
# View top pathways
head(as.data.frame(gsea_res))
# Save results
write.csv(as.data.frame(gsea_res), "GSEA_Hallmark_Mouse_HOM_vs_WT.csv", row.names = FALSE)
library(enrichplot)
# Convert GSEA results to dataframe
gsea_df <- as.data.frame(gsea_res)
# Suppose the top (and only) enriched pathway:
top_pathway <- gsea_df$ID[5] # or use $Description if you prefer
# Classic GSEA plot for the top pathway
gseaplot2(
gsea_res,
geneSetID = top_pathway, # pathway ID
title = gsea_df$Description[5], # nice descriptive title
color = "red"
)
#PLOT HALLMARK PATHWAYS
library(ggplot2)
# Convert GSEA results to dataframe
gsea_df <- as.data.frame(gsea_res)
# Order pathways by NES (normalized enrichment score)
gsea_df <- gsea_df[order(gsea_df$NES, decreasing = TRUE), ]
# Plot ALL enriched pathways
ggplot(gsea_df, aes(x = reorder(Description, NES), y = NES, fill = -log10(p.adjust))) +
geom_col() +
coord_flip() +
labs(
title = "GSEA: All Enriched Hallmark Pathways",
x = "Pathway",
y = "Normalized Enrichment Score (NES)",
fill = "-log10 adj p-value"
) +
theme_minimal(base_size = 14)
#plot GSEA KEGG, GO, REACTOME --------------------------
# Prepare ranked gene list with Entrez IDs for GSEA
# --------------------------
library(clusterProfiler)
library(org.Mm.eg.db)
library(ReactomePA)
# Convert gene symbols to Entrez IDs
entrez_map <- bitr(names(gene_list), fromType="SYMBOL", toType="ENTREZID", OrgDb=org.Mm.eg.db)
gene_list_df <- merge(entrez_map, data.frame(log2FC = gene_list), by.x="SYMBOL", by.y="row.names")
gene_list_df <- gene_list_df[!duplicated(gene_list_df$ENTREZID), ]
gene_list_named <- gene_list_df$log2FC
names(gene_list_named) <- gene_list_df$ENTREZID
# Sort decreasing for GSEA
gene_list_named <- sort(gene_list_named, decreasing = TRUE)
# --------------------------
# 1) GSEA: KEGG Pathways
# --------------------------
gsea_kegg <- gseKEGG(
geneList = gene_list_named,
organism = "mmu",
minGSSize = 10,
pvalueCutoff = 0.1,
verbose = TRUE
)
# Save KEGG GSEA results
write.csv(as.data.frame(gsea_kegg), "GSEA_KEGG_HOM_vs_WT.csv", row.names = FALSE)
# Top 30 KEGG pathways barplot
library(enrichplot)
library(ggplot2)
# Convert gseaResult to dataframe to see top pathways
gsea_df <- as.data.frame(gsea_kegg)
# Select top 30 pathways by NES or pvalue
top30 <- gsea_df[order(gsea_df$NES, decreasing = TRUE)[1:30], ]
# Ridgeplot (shows enrichment distribution for multiple pathways)
ridgeplot(gsea_kegg, showCategory = 30) +
ggtitle("GSEA: KEGG Top 30 Pathways") +
theme_minimal(base_size = 14)
# Optional: classic GSEA plot for the top pathway
top_pathway <- top30$ID[1]
gseaplot2(gsea_kegg, geneSetID = top_pathway,
title = top30$Description[1], color = "red")
# --------------------------
# 2) GSEA: GO Biological Process (BP)
# --------------------------
gsea_go_bp <- gseGO(
geneList = gene_list_named,
OrgDb = org.Mm.eg.db,
ont = "ALL",
keyType = "ENTREZID",
minGSSize = 10,
maxGSSize = 500,
pvalueCutoff = 0.1,
verbose = TRUE
)
# Save GO BP GSEA results
write.csv(as.data.frame(gsea_go_bp), "GSEA_GO_BP_HOM_vs_WT.csv", row.names = FALSE)
# Top 30 GO BP pathways barplot
barplot(gsea_go_bp, showCategory = 30, title = "GSEA: GO BP Top 30 Pathways")
# --------------------------
# 3) GSEA: Reactome Pathways
# --------------------------
gsea_reactome <- gsePathway(
geneList = gene_list_named,
organism = "mouse",
minGSSize = 10,
pvalueCutoff = 0.1,
verbose = TRUE
)
# Save Reactome GSEA results
write.csv(as.data.frame(gsea_reactome), "GSEA_Reactome_HOM_vs_WT.csv", row.names = FALSE)
# Top 30 Reactome pathways barplot
barplot(gsea_reactome, showCategory = 30, title = "GSEA: Reactome Top 30 Pathways")
# --------------------------
library(ggplot2)
library(dplyr)
# Convert GSEA Reactome results to dataframe
gsea_reactome_df <- as.data.frame(gsea_reactome)
# Select top 30 pathways by NES magnitude
top30_reactome <- gsea_reactome_df %>%
arrange(desc(abs(NES))) %>%
slice(1:30)
# Reorder for plotting (highest NES on top)
top30_reactome$Description <- factor(top30_reactome$Description, levels = rev(top30_reactome$Description))
# Plot barplot: NES on x-axis, pathways on y-axis, fill by -log10(padj)
ggplot(top30_reactome, aes(x = NES, y = Description, fill = -log10(p.adjust))) +
geom_bar(stat = "identity") +
scale_fill_gradient(low = "red", high = "darkred") +
theme_minimal(base_size = 14) +
labs(title = "GSEA: Top 30 Reactome Pathways",
x = "Normalized Enrichment Score (NES)",
y = "",
fill = "-log10(adj.p)")
#Querrying the xepression of genes within a particular pathway you may be interested in i.e type I interferon signaling
ifna_genes <- c(
"Ifi208","Mndal","Pdcd5-ps","Gstp-ps","Ifi203-ps","Rrp8","Ifi206","Chchd10","Inca1","Ppif",
"Topors","Rrn3","Eaf2","Ticam1","Unc5b","Bmyc","Cth","Pttg1ip","Selenos","Fgb","Raf1",
"Ggct","Tmbim6","Kcnq3","Scn2a","D1Pas1","Acvr1","Pawr","Acvr1b","Adora2a","Parp1","Parp2",
"Agt","Agtr2","Akt1","Aldh2","Alox12","Ivns1abp","Slc25a4","Slc25a5","Anxa6","Apaf1","App",
"Ar","Atf2","Atf3","Atf4","Atm","Atp2a1","Atp7a","Atp5if1","Avp","Bad","Bak1","Bax","Bcl10",
"Bcl2","Bcl2a1a","Bcl2a1b","Bcl2a1c","Bcl2a1d","Bcl2l1","Bcl2l10","Bcl2l2","Bcl3","Bdkrb2",
"Bdnf","Bex3","Bid","Hrk","Bik","Bcl2l11","Bmi1","Bmp4","Bmpr1b","Bnip3","Bnip3l","Brca1",
"Brca2","Birc6","Bub1","Hyou1","Camk2b","Casp12","Casp2","Casp3","Casp6","Casp8","Casp9",
"Ctnna1","Ctnnb1","Cav1","Runx3","Rb1cc1","Cck","Cd24a","Cd28","Cd3e","Cd44","Cd5","Cdk11b",
"Cdkn1a","Cdkn2d","Cebpb","Cflar","Clu","Ackr3","Col2a1","Cradd","Creb3","Crh","Crip1",
"Csf2","Csnk2a1","Ctsc","Ctsh","Cttn","Cx3cr1","Cycs","Cyct","Cyp1b1","Dab2","Dapk2",
"Dapk3","Daxx","Dbh","Ddit3","Ddx3x","Ddx5","E2f1","Ei24","Eif2ak3","Eno1","Epha2","Epo",
"Erbb3","Ercc2","Ptprv","Esr2","Eya1","Eya2","Eya3","Eya4","Fadd","Faf1","Fas","Fasl",
"Fcgr2b","Fem1b","Fga","Fgf10","Fgf2","Fgfr1","Fgfr2","Fgfr3","Fhit","Fxn","Tlr3","Fyn",
"Fzd1","Fzd9","G0s2","Gas1","Gata1","Gata4","Usp15","Gcg","Gdnf","Gclc","Gclm","Gnai2","Gnai3",
"Rack1","Gpx1","Pdia3","Gstp2","Gstp1","Hdac2","Htt","Hells","Hgf","Hic1","Hif1a","Hint1",
"Hipk1","Hipk2","Hmox1","Hnrnpk","Hras","Dnaja1","Hspb1","Hspa1b","Hyal2","Icam1","Ier3",
"Ifi203","Ifi204","Ifnb1","Ifng","Igf1","Cd74","Ikbkg","Il10","Il12a","Il18","Il1a","Il1b",
"Il2","Il3","Il4","Il7","Inhba","Inhbb","Ins2","Itga6","Itgav","Itpr1","Jak2","Jak3","Jun",
"Kcnq2","Klf4","Krt18","Krt8","Lck","Lcn2","Lgals3","Lmna","Lta","Ltb","Ltbr","Mfn2","Sgk3",
"Bbc3","Rtkn2","Smad3","Smad4","Mal","Bmf","Maz","Mbd4","Mcl1","Mdm2","Mdm4","Melk","Kitl",
"Mif","Mknk1","Mknk2","Mlh1","Mmp2","Mmp9","Mnt","Meis3","Msh2","Msh6","Msx1","Mapt","Muc1",
"Myc","Nck1","Nck2","Nf1","Nfe2l2","Ngf","Ngfr","Nkx3-1","Nodal","Nog","Nrp1","Nr4a2","Osm",
"Mybbp1a","P2rx4","P2rx7","P4hb","Igbp1","Pdk2","Pdpk1","Pdx1","Pea15a","Pik3r1","Prkca",
"Prkcd","Serpine1","Plaur","Pml","Pmp22","Pnp","Septin4","Polb","Pou4f1","Pou4f2","Ppard",
"Ppef2","Ppp1ca","Prkdc","Mapk8ip1","Prodh","Psen1","Psen2","Psme3","Pten","Ptgis","Ptgs2",
"Ptpn1","Ptpn2","Ptprc","Rad9a","Nlrp1a","Rb1","Rela","Ret","Ripk1","Uri1","Rnf7","Rock2",
"Rpl26","Rps7","S100a8","S100a9","Scg2","Cx3cl1","Cxcl12","Sfrp2","Spi1","Sfrp1","Sgpl1",
"Shh","Siah1a","Siah1b","Siah2","Skil","Snai2","Siglec1","Snai1","Sod1","Sod2","Sort1","Sp100",
"Spn","Spop","Src","Stk11","Pycr1","Stx4a","Trp53bp2","Syk","Nrg1","Tifab","Taf6","Tcf7l2",
"Rhot2","Hip1","Agap2","Prdx2","Flcn","Arrb2","Tmc8","Tert","G2e3","Ifi27l2b","Tgfb1","Tgfb2",
"Tgfbr1","Thbs1","Tlr4","Tlr6","Ccar2","Tnf","Tnfaip3","Tnfrsf10b","Tnfrsf1a","Tnfrsf1b",
"Cd27","Tnfsf11","Tnfsf12","Dedd","Cd40lg","Cd70","Ppp1r13b","Tpd52l1","Traf1","Traf2",
"Tnfsf10","Plscr1","Trp53","Trp63","Trp73","Tpt1","Tnfrsf4","Tnfsf4","Ubb","Umod","Kdm6a",
"Stk24","Vdac2","Vdr","Vegfa","Dap","Vhl","Vnn1","Mrtfa","Senp1","Wfs1","Wnt1","Pak2",
"Wnt4","Wnt5a","Xbp1","Traf7","Bag6","Gstp3","Xpa","Yap1","Zfp13","Pcgf2","Ifi207","Stradb",
"Pdk1","Madd","Trib3","Eif2a","Tmem161a","Usp28","Ifi209","Nox1","Il20ra","Atad5","Dido1",
"Faim","Map2k5","Mapk7","Prkra","Peli3","Rbck1","Zfp385b","Pak5","E2f2","Nanos3","Eda2r",
"AY074887","Map2k1","Map2k4","Map3k5","Map3k7","Mapk8","Mapk9","Creb3l1","Ppia","Casp8ap2",
"Ern2","Aifm1","Acvr1c","Ppp2r5c","Ell3","Nherf1","Serinc3","Rps3","Bcap31","Adcy10","Tnfrsf12a",
"Phlda3","Nbn","Cep63","Bag3","Zfp385a","Hip1r","Siva1","Ifnz","Ercc6","Tmem117","Tnfsf15",
"Ep300","Il19","Fnip2","Card9","Tmem102","Parl","Rrm2b","Gfral","Itprip","Eno1b","Acsl5",
"Mettl21c","Hdac1","Gsdma3","Ero1a","Fbxw7","Fbh1","Prkn","Chek2","Tnfsf14","Pdcd7","Ppp2r1a",
"Srpx","Bok","Zfp622","Acaa2","Ifi27","Atp2a3","Ube2k","Pla2g6","Psmd10","Nono","Asah2","Ifi214",
"Pde3a","Sh3glb1","Plagl2","Gsdme","Sfn","Lgals12","Ubqln1","Becn1","Stk3","Higd1a","Nupr1",
"Aatf","Pdcd5","Pdcd10","Mtch2","Ybx3","Foxo3","Gabarap","Ikbke","Ripk3","Gsk3b","Ankrd2",
"Mllt11","Park7","Marchf7","Noc2l","Jmy","Pidd1","Stk4","Pmaip1","Pias4","Sh3rf1","Rhot1",
"Stk25","Fignl1","Mapk8ip2","Gsk3a","Ifi213","Faiml","Nlrp1b","Ube4b","Perp","Moap1","Herpud1",
"Itm2c","Htra2","Zfp110","Arl6ip5","Txndc12","Ghitm","Eef1e1","Grina","Ing5","Snw1","Fis1",
"Pam16","Ptpmt1","Prelid1","Zmynd11","Timm50","Diablo","Cdip1","Lrrk2","Gskip","Bcl2l14",
"Pycard","Rnf186","Dele1","Dnajc10","Shisa5","Ndufa13","Armc10","Rffl","Dedd2","Erp29",
"Rnf41","Ddx47","Rps27l","Nacc2","Trap1","Coa8","Aen","Ndufs3","Mul1","Steap3","Tmem109",
"Ppm1f","Pink1","Zfas1","Zdhhc3","Chac1","Triap1","Fcmr","Dyrk2","Qrich1","Ing2","Dab2ip",
"Dapk1","Tmbim1","Tfpt","Fbxo7","Trim32","Fam162a","Plscr3","Bag5","Sfpq","Tmem238l","Tradd",
"Zswim2","Faim2","Rps6kb1","Uaca","Bclaf1","Nfatc4","Slc25a31","Bloc1s2","Ppp2r1b","Bbln",
"Dnm1l","Ddias","Syvn1","Opa1","Cyld","Wdr35","Ddit4","Pik3cb","Slc35f6","Usp47","Nme5",
"Tmem14a","Mff","Bcl2l12","Brsk2","Rnf183","Knl1","Styxl1","Dapl1","Gper1","Ifi27l2a",
"Il33","Nol3","Ern1","Tnfrsf23","Tnfrsf22","Trim39","Wwox","Rnf34","Selenok","Clca3a2",
"Nfkbiz","Sgpp1","Trem2","Trps1","Phip","Mpv17l","Wnt16","Sirt1","Tm2d1","Maged1","Hmgb2",
"Qars1","Deptor","Mael","Fgg","Kdm1a"
)
# Get normalized counts from DESeq2
norm_counts <- counts(dds, normalized=TRUE)
# Subset for IFN-α response genes (keep only genes present in your dataset)
ifna_counts <- norm_counts[rownames(norm_counts) %in% ifna_genes, ]
# Optionally, z-score normalize each gene for heatmap visualization
ifna_counts_z <- t(scale(t(ifna_counts)))
# Create annotation for columns
ann_col <- data.frame(
Condition = metadata$condition
)
rownames(ann_col) <- metadata$sample_id
library(pheatmap)
# Make sure columns are in your desired order
desired_order <- c("sample.WT_1", "sample.WT_2", "sample.WT_3",
"sample.neg.neg_1", "sample.neg.neg_2", "sample.neg.neg_3")
ifna_counts_z <- ifna_counts_z[, desired_order]
# Make sure annotation matches
ann_col <- ann_col[desired_order, , drop = FALSE]
# Heatmap
pheatmap(ifna_counts_z,
annotation_col = ann_col,
show_rownames = TRUE,
show_colnames = TRUE,
cluster_rows = TRUE,
cluster_cols = FALSE, # keep the column order fixed
scale = "row",
fontsize_row = 8,
main = "HALLMARK_APOPTOSIS")
# Subset DEGs for IFNa-related genes
ifna_DEGs <- DEGs_df[DEGs_df$gene %in% ifna_genes, ]
# Save to CSV
write.csv(ifna_DEGs, "GOBP_APOPTOSIS_HOM_vs_WT DEG.csv", row.names = FALSE)
# Save subset expression to CSV
write.csv(as.data.frame(ifna_counts),
file = "GOBP_APOPTOSIS_HOM VS WT.csv",
row.names = TRUE)
# Quick check
print(ifna_DEGs)
library(ggplot2)
library(ggrepel)
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(
title = "HALLMARK_APOPTOSIS: HOM vs WT",
x = "log2 Fold Change (HOM vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status"
) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
# Label only IFNa DEGs
geom_text_repel(
data = ifna_DEGs,
aes(x = log2FoldChange, y = -log10(padj), label = gene),
inherit.aes = FALSE, # <- prevents inheriting threshold color mapping
color = "blue",
size = 4,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "black"
)
#Heatmap for DEGs GOBP Apoptosis
# Subset DEGs for IFNa-related genes
ifna_DEGs <- DEGs_df[DEGs_df$gene %in% ifna_genes, ]
# Subset normalized counts to only DEGs
ifna_counts_DEG <- ifna_counts[rownames(ifna_counts) %in% ifna_DEGs$gene, ]
# Z-score normalize each DEG for heatmap
ifna_counts_DEG_z <- t(scale(t(ifna_counts_DEG)))
# Make sure columns are in your desired order
ifna_counts_DEG_z <- ifna_counts_DEG_z[, desired_order]
# Annotation matches columns
ann_col_DEG <- ann_col[desired_order, , drop = FALSE]
# Heatmap of only IFNa DEGs
pheatmap(ifna_counts_DEG_z,
annotation_col = ann_col_DEG,
show_rownames = TRUE,
show_colnames = TRUE,
cluster_rows = TRUE,
cluster_cols = FALSE, # keep column order fixed
scale = "row",
fontsize_row = 8,
main = "GOBP Apoptosis_DEGs Heatmap")