ssGSEA UBR5 vs Hallmark IFNg response

PHOTO EMBED

Tue Apr 29 2025 21:46:32 GMT+0000 (Coordinated Universal Time)

Saved by @1234_5

setwd("C:\\Users\\d.a.odhiambo\\Desktop\\UBR5 vs Mets Analysis")
# Load TPM data
tpm <- read.csv("GemTPM_Primary.csv")
# Remove duplicate rows from Counts
tpm <- tpm[!duplicated(tpm[, 1]), ]
rownames(tpm) <- tpm[, 1]
tpm <- tpm[, -1]

ubr5_expr <- as.numeric(tpm["UBR5", ])
library(msigdbr)
library(GSVA)
library(dplyr)

# Get Hallmark Interferon Gamma Response gene set
m_df <- msigdbr(species = "Homo sapiens", category = "H")
hallmark_ifng_genes <- m_df %>%
  filter(gs_name == "HALLMARK_INTERFERON_GAMMA_RESPONSE") %>%
  pull(gene_symbol)

# Subset to only genes in your dataset
gene_set_list <- list(HALLMARK_INTERFERON_GAMMA_RESPONSE = intersect(hallmark_ifng_genes, rownames(tpm)))

# Prepare expression matrix (ensure it's numeric)
tpm_matrix <- as.matrix(tpm)
mode(tpm_matrix) <- "numeric"

library(GSVA)

# Step 1: Create ssGSEA parameter object (no additional args here!)
ssgsea_params <- ssgseaParam(exprData = tpm_matrix, geneSets = gene_set_list)

# Step 2: Run ssGSEA
ssgsea_scores <- gsva(param = ssgsea_params)

# Get the Hallmark Interferon Gamma Response score
ifng_score <- as.numeric(ssgsea_scores["HALLMARK_INTERFERON_GAMMA_RESPONSE", ])
cor.test(ubr5_expr, ifng_score)

# Run correlation test
correlation_result <- cor.test(ubr5_expr, ifng_score)

# Get correlation coefficient and p-value
cor_coefficient <- round(correlation_result$estimate, 3)  # Extract correlation coefficient and round
p_value <- round(correlation_result$p.value, 3)  # Extract p-value and round

# Plotting
library(ggplot2)

# Create a data frame for plotting
data_for_plot <- data.frame(
  UBR5_Expression = ubr5_expr,
  IFNG_Score = ifng_score
)

# Generate the plot
ggplot(data_for_plot, aes(x = UBR5_Expression, y = IFNG_Score)) +
  geom_point(color = "blue") +
  labs(title = "Correlation between UBR5 Expression and IFN-gamma Response",
       x = "UBR5 Expression",
       y = "Hallmark IFN-gamma Response Score") +
  theme_minimal() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +  # Adding a linear regression line for visualization
  annotate("text", x = max(ubr5_expr), y = min(ifng_score), 
           label = paste("r = ", cor_coefficient, "\np = ", format(p_value, scientific = TRUE)),  # Use scientific format for very small p-values
           hjust = 1, vjust = 0, color = "black", size = 5, fontface = "italic")
p_value <- formatC(correlation_result$p.value, format = "f", digits = 7)
label = paste("r = ", cor_coefficient, "\np = ", p_value)
# Get correlation coefficient and exact p-value
cor_coefficient <- round(correlation_result$estimate, 3)
p_value <- formatC(correlation_result$p.value, format = "f", digits = 7)

# Create plot
ggplot(data_for_plot, aes(x = UBR5_Expression, y = IFNG_Score)) +
  geom_point(color = "blue") +
  labs(title = "Correlation between UBR5 Expression and IFN-gamma Response",
       x = "UBR5 Expression",
       y = "Hallmark IFN-gamma Response Score") +
  theme_minimal() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  annotate("text", x = max(ubr5_expr), y = min(ifng_score),
           label = paste("r = ", cor_coefficient, "\np = ", p_value),
           hjust = 1, vjust = 0, color = "black", size = 5, fontface = "italic")
content_copyCOPY