ssGSEA UBR5 vs Hallmark IFNg response
Tue Apr 29 2025 21:46:32 GMT+0000 (Coordinated Universal Time)
Saved by @1234_5
setwd("C:\\Users\\d.a.odhiambo\\Desktop\\UBR5 vs Mets Analysis") # Load TPM data tpm <- read.csv("GemTPM_Primary.csv") # Remove duplicate rows from Counts tpm <- tpm[!duplicated(tpm[, 1]), ] rownames(tpm) <- tpm[, 1] tpm <- tpm[, -1] ubr5_expr <- as.numeric(tpm["UBR5", ]) library(msigdbr) library(GSVA) library(dplyr) # Get Hallmark Interferon Gamma Response gene set m_df <- msigdbr(species = "Homo sapiens", category = "H") hallmark_ifng_genes <- m_df %>% filter(gs_name == "HALLMARK_INTERFERON_GAMMA_RESPONSE") %>% pull(gene_symbol) # Subset to only genes in your dataset gene_set_list <- list(HALLMARK_INTERFERON_GAMMA_RESPONSE = intersect(hallmark_ifng_genes, rownames(tpm))) # Prepare expression matrix (ensure it's numeric) tpm_matrix <- as.matrix(tpm) mode(tpm_matrix) <- "numeric" library(GSVA) # Step 1: Create ssGSEA parameter object (no additional args here!) ssgsea_params <- ssgseaParam(exprData = tpm_matrix, geneSets = gene_set_list) # Step 2: Run ssGSEA ssgsea_scores <- gsva(param = ssgsea_params) # Get the Hallmark Interferon Gamma Response score ifng_score <- as.numeric(ssgsea_scores["HALLMARK_INTERFERON_GAMMA_RESPONSE", ]) cor.test(ubr5_expr, ifng_score) # Run correlation test correlation_result <- cor.test(ubr5_expr, ifng_score) # Get correlation coefficient and p-value cor_coefficient <- round(correlation_result$estimate, 3) # Extract correlation coefficient and round p_value <- round(correlation_result$p.value, 3) # Extract p-value and round # Plotting library(ggplot2) # Create a data frame for plotting data_for_plot <- data.frame( UBR5_Expression = ubr5_expr, IFNG_Score = ifng_score ) # Generate the plot ggplot(data_for_plot, aes(x = UBR5_Expression, y = IFNG_Score)) + geom_point(color = "blue") + labs(title = "Correlation between UBR5 Expression and IFN-gamma Response", x = "UBR5 Expression", y = "Hallmark IFN-gamma Response Score") + theme_minimal() + geom_smooth(method = "lm", se = FALSE, color = "red") + # Adding a linear regression line for visualization annotate("text", x = max(ubr5_expr), y = min(ifng_score), label = paste("r = ", cor_coefficient, "\np = ", format(p_value, scientific = TRUE)), # Use scientific format for very small p-values hjust = 1, vjust = 0, color = "black", size = 5, fontface = "italic") p_value <- formatC(correlation_result$p.value, format = "f", digits = 7) label = paste("r = ", cor_coefficient, "\np = ", p_value) # Get correlation coefficient and exact p-value cor_coefficient <- round(correlation_result$estimate, 3) p_value <- formatC(correlation_result$p.value, format = "f", digits = 7) # Create plot ggplot(data_for_plot, aes(x = UBR5_Expression, y = IFNG_Score)) + geom_point(color = "blue") + labs(title = "Correlation between UBR5 Expression and IFN-gamma Response", x = "UBR5 Expression", y = "Hallmark IFN-gamma Response Score") + theme_minimal() + geom_smooth(method = "lm", se = FALSE, color = "red") + annotate("text", x = max(ubr5_expr), y = min(ifng_score), label = paste("r = ", cor_coefficient, "\np = ", p_value), hjust = 1, vjust = 0, color = "black", size = 5, fontface = "italic")
Comments