setwd("C:\\Users\\d.a.odhiambo\\Desktop\\UBR5 vs Mets Analysis")
# Load TPM data
tpm <- read.csv("GemTPM_Primary.csv")
# Remove duplicate rows from Counts
tpm <- tpm[!duplicated(tpm[, 1]), ]
rownames(tpm) <- tpm[, 1]
tpm <- tpm[, -1]
ubr5_expr <- as.numeric(tpm["UBR5", ])
library(msigdbr)
library(GSVA)
library(dplyr)
# Get Hallmark Interferon Gamma Response gene set
m_df <- msigdbr(species = "Homo sapiens", category = "H")
hallmark_ifng_genes <- m_df %>%
filter(gs_name == "HALLMARK_INTERFERON_GAMMA_RESPONSE") %>%
pull(gene_symbol)
# Subset to only genes in your dataset
gene_set_list <- list(HALLMARK_INTERFERON_GAMMA_RESPONSE = intersect(hallmark_ifng_genes, rownames(tpm)))
# Prepare expression matrix (ensure it's numeric)
tpm_matrix <- as.matrix(tpm)
mode(tpm_matrix) <- "numeric"
library(GSVA)
# Step 1: Create ssGSEA parameter object (no additional args here!)
ssgsea_params <- ssgseaParam(exprData = tpm_matrix, geneSets = gene_set_list)
# Step 2: Run ssGSEA
ssgsea_scores <- gsva(param = ssgsea_params)
# Get the Hallmark Interferon Gamma Response score
ifng_score <- as.numeric(ssgsea_scores["HALLMARK_INTERFERON_GAMMA_RESPONSE", ])
cor.test(ubr5_expr, ifng_score)
# Run correlation test
correlation_result <- cor.test(ubr5_expr, ifng_score)
# Get correlation coefficient and p-value
cor_coefficient <- round(correlation_result$estimate, 3) # Extract correlation coefficient and round
p_value <- round(correlation_result$p.value, 3) # Extract p-value and round
# Plotting
library(ggplot2)
# Create a data frame for plotting
data_for_plot <- data.frame(
UBR5_Expression = ubr5_expr,
IFNG_Score = ifng_score
)
# Generate the plot
ggplot(data_for_plot, aes(x = UBR5_Expression, y = IFNG_Score)) +
geom_point(color = "blue") +
labs(title = "Correlation between UBR5 Expression and IFN-gamma Response",
x = "UBR5 Expression",
y = "Hallmark IFN-gamma Response Score") +
theme_minimal() +
geom_smooth(method = "lm", se = FALSE, color = "red") + # Adding a linear regression line for visualization
annotate("text", x = max(ubr5_expr), y = min(ifng_score),
label = paste("r = ", cor_coefficient, "\np = ", format(p_value, scientific = TRUE)), # Use scientific format for very small p-values
hjust = 1, vjust = 0, color = "black", size = 5, fontface = "italic")
p_value <- formatC(correlation_result$p.value, format = "f", digits = 7)
label = paste("r = ", cor_coefficient, "\np = ", p_value)
# Get correlation coefficient and exact p-value
cor_coefficient <- round(correlation_result$estimate, 3)
p_value <- formatC(correlation_result$p.value, format = "f", digits = 7)
# Create plot
ggplot(data_for_plot, aes(x = UBR5_Expression, y = IFNG_Score)) +
geom_point(color = "blue") +
labs(title = "Correlation between UBR5 Expression and IFN-gamma Response",
x = "UBR5 Expression",
y = "Hallmark IFN-gamma Response Score") +
theme_minimal() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
annotate("text", x = max(ubr5_expr), y = min(ifng_score),
label = paste("r = ", cor_coefficient, "\np = ", p_value),
hjust = 1, vjust = 0, color = "black", size = 5, fontface = "italic")
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter