setwd("C:\\Users\\d.a.odhiambo\\Desktop\\UBR5 vs Mets Analysis")
# Load TPM data
tpm <- read.csv("GemTPM_Primary.csv")
# Remove duplicate rows from Counts
tpm <- tpm[!duplicated(tpm[, 1]), ]
rownames(tpm) <-tpm[, 1]
tpm<- tpm [, -1]
ubr5_expr <- as.numeric(tpm["UBR5", ])
# Define genes
# Define target gene(s)
target_gene <- "IFNGR1"
# Filter and extract expression
target_expr <- as.numeric(tpm[rownames(tpm) == target_gene, , drop = FALSE])
# Plotting
library(ggplot2)
library(ggrepel)
# Assuming column names of tpm are sample names:
sample_names <- colnames(tpm)
# Create the correlation data frame with sample names
correlation_data <- data.frame(
Sample = sample_names,
UBR5 = ubr5_expr,
IFNGR1 = target_expr
)
# Identify samples with "high" CD274 (you can define your own threshold)
high_IFNGR1_thresh <- quantile(correlation_data$IFNGR1, 0.9, na.rm = TRUE) # top 10% by default
correlation_data$Label <- ifelse(correlation_data$IFNGR1 > high_IFNGR1_thresh, correlation_data$Sample, "")
# Pearson correlation
cor_test <- cor.test(correlation_data$UBR5, correlation_data$IFNGR1, method = "pearson")
cor_value <- round(cor_test$estimate, 3)
p_value <- signif(cor_test$p.value, 3)
ggplot(correlation_data, aes(x = UBR5, y = IFNGR1)) +
geom_point(color = "blue", size = 3, alpha = 0.7) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
geom_text_repel(aes(label = Label), size = 3, max.overlaps = Inf) +
labs(
title = "Correlation between UBR5 and IFNGR1_PT",
x = "UBR5 Expression",
y = "IFNGR1 Expression"
) +
theme_minimal() +
annotate(
"text",
x = max(correlation_data$UBR5, na.rm = TRUE) * 0.8,
y = max(correlation_data$IFNGR1, na.rm = TRUE),
label = paste("Pearson r =", cor_value, "\nP-value =", p_value),
size = 5,
color = "black",
hjust = 0
)
Comments