作者,追风少年i
今天我们继续距离分析,这个分析在空间转录组是比较关键的一环。
之前分享的距离分析文章,主要针对区域和细胞类型的关系,
大家可以复习一下2024年度单细胞空间课程系列的第23课,空间轨迹。
那如果衡量两种细胞类型的空间距离分析呢?尤其是多样本进行比较的情况下
我们来实现一下
首先第一步获取距离矩阵
library(Seurat)
source("distance_comparison_function.R")
spatial_SO <- readRDS(file = 'spatial.rds')
spot_diameter_fullres = spatial_SO@images$image@scale.factors$fiducial###383.6347
pos <- GetTissueCoordinates(spatial_SO, scale = NULL) # returns positions in pixels
pos$imagerow <- pos$imagerow*(65/spot_diameter_fullres) # converts pixels to microns
pos$imagecol <- pos$imagecol*(65/spot_diameter_fullres) # converts pixels to microns
dist_mat <- matrix(NA, nrow = nrow(pos), ncol = nrow(pos))
# Nested Loop through rows and columns of position (pos) df
for(n in 1:nrow(pos)){
for(j in 1:nrow(pos)){
if(n == j){
dist_mat[n,j] <- 0
} else {
point1 <- as.list(pos[n,])
point2 <- as.list(pos[j,])
dist <- sqrt((point1$imagerow - point2$imagerow)^2 + (point1$imagecol - point2$imagecol)^2)
dist_mat[n,j] <- dist
dist_mat[j,n] <- dist
}
}
}
colnames(dist_mat) <- rownames(pos)
rownames(dist_mat) <- rownames(pos)
计算距离,以三种细胞类型为例
proportions <- data.frame(
"pEMT" = c(spatial_SO$pEMT),
"PTC" = c(spatial_SO$PTC),
"myCAF" = c(spatial_SO$myCAF)
)
# Calculate min. distance of spots with at least 10% pEMT PTC phenotype from spots with at least 10% myCAF phenotype
pEMT_myCAF_distance <- distance_comparison(distances = distance_matrix,
proportions = proportions,
topic1 = "pEMT",
topic2 = "myCAF",
t1_thresh = 0.1,
t2_thresh = 0.1)
# Calculate min. distance of spots with at least 10% PTC phenotype from spots with at least 10% myCAF phenotype
PTC_myCAF_distance <- distance_comparison(distances = distance_matrix,
proportions = proportions,
topic1 = "PTC",
topic2 = "myCAF",
t1_thresh = 0.1,
t2_thresh = 0.1)
combined_distance <- rbind(pEMT_myCAF_distance, PTC_myCAF_distance)
# Violin plot to compare distances
distance_violin <- ggplot(combined_distance, aes(x = barcodes, y = min, fill = barcodes)) +
geom_violin(scale = "width") +
theme_classic() +
scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
scale_x_discrete(name ="Deconvolution", limits = c("PTC vs. myCAF", "pEMT vs. myCAF"))
# Save violin plot
ggsave(file = paste0("outputs/Distance_Plots/", samples[i], "/PTC_pEMT_myCAF_Min_Distance_Violin.png"),
distance_violin, height = 5, width = 6, dpi = 600)
# For each sample calculate the mean and the median of the minimum distance from myCAF for pEMT PTC and PTC
mean_median_min_distances <- data.frame(
"sample" = c(samples[i]),
"pEMT_myCAF_mean" = c(mean(pEMT_myCAF_distance$min)),
"PTC_myCAF_mean" = c(mean(PTC_myCAF_distance$min)),
"pEMT_myCAF_median" = c(median(pEMT_myCAF_distance$min)),
"PTC_myCAF_median" = c(median(PTC_myCAF_distance$min))
)
distances_overview = mean_median_min_distances
distances_overview_plotting <- distances_overview %>%
pivot_longer(
cols = starts_with("pEMT_myCAF_mean") | starts_with("pEMT_myCAF_median") | starts_with("PTC_myCAF_mean") | starts_with("PTC_myCAF_median"),
names_to = c("Distance_Group"),
values_to = "Distance"
)
# plots based on median
median_distances_plotting <- distances_overview_plotting %>% subset(Distance_Group == "pEMT_myCAF_median" | Distance_Group == "PTC_myCAF_median")
# make plot
plot <- ggplot(median_distances_plotting, aes(Distance_Group, Distance)) +
geom_boxplot(outlier.size = -1,
aes(fill = Distance_Group),
alpha = 0.9,
show.legend = FALSE) +
geom_point(aes(),
#position = position_jitter(width = 0.1, height = 0),
size = 2,
alpha = 0.7,
show.legend = FALSE) +
geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
labs (x = "Tumor", y = "iPVCAF Correlation") +
theme_classic() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold", size = 14.5),
axis.title.y = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", size = 10),
axis.text.y = element_text(face = "bold", size = 16)) +
scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_median", "pEMT_myCAF_median")) +
scale_y_continuous(breaks = c(0, 300, 600, 900, 1200, 1500),
limits = c(0, 1550))
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Distance_Boxplots.png",
width = 2.75, height = 3,
plot, dpi = 600, create.dir = TRUE)
# Plot with distance as log2 with a pseudocount to avoid log2(0)
plot <- ggplot(median_distances_plotting, aes(Distance_Group, log2(Distance+1))) +
geom_boxplot(outlier.size = -1,
aes(fill = Distance_Group),
alpha = 0.9,
show.legend = FALSE) +
geom_point(aes(),
#position = position_jitter(width = 0.1, height = 0),
size = 2,
alpha = 0.7,
show.legend = FALSE) +
geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
labs (x = "Tumor", y = "iPVCAF Correlation") +
theme_classic() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold", size = 14.5),
axis.title.y = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", size = 10),
axis.text.y = element_text(face = "bold", size = 16)) +
scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_median", "pEMT_myCAF_median")) +
scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10),
limits = c(0, 11))
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Log2_Distance_Boxplots.png",
width = 2.75, height = 3,
plot, dpi = 600, create.dir = TRUE)
######### MEAN plots ################
# Repeat of median plots above but using mean instead of median
# mean does a better job of capturing the variability
mean_distances_plotting <- distances_overview_plotting %>% subset(Distance_Group == "pEMT_myCAF_mean" | Distance_Group == "PTC_myCAF_mean")
plot <- ggplot(mean_distances_plotting, aes(Distance_Group, Distance)) +
geom_boxplot(outlier.size = -1,
aes(fill = Distance_Group),
alpha = 0.9,
show.legend = FALSE) +
geom_point(aes(),
#position = position_jitter(width = 0.1, height = 0),
size = 2,
alpha = 0.7,
show.legend = FALSE) +
geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
labs (x = "Tumor", y = "iPVCAF Correlation") +
theme_classic() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold", size = 14.5),
axis.title.y = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", size = 10),
axis.text.y = element_text(face = "bold", size = 16)) +
scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_mean", "pEMT_myCAF_mean")) +
scale_y_continuous(breaks = c(0, 400, 800, 1200, 1600),
limits = c(0, 1700))
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Mean_Distance_Boxplots.png",
width = 2.75, height = 3,
plot, dpi = 600, create.dir = TRUE)
plot <- ggplot(mean_distances_plotting, aes(Distance_Group, log2(Distance+1))) +
geom_boxplot(outlier.size = -1,
aes(fill = Distance_Group),
alpha = 0.9,
show.legend = FALSE) +
geom_point(aes(),
#position = position_jitter(width = 0.1, height = 0),
size = 2,
alpha = 0.7,
show.legend = FALSE) +
geom_line(aes(group = sample), color = "black", linewidth = 0.5, alpha = 0.5, linetype = "dashed") +
scale_fill_manual(values = c("darkgrey", "#FFCC99")) +
labs (x = "Tumor", y = "iPVCAF Correlation") +
theme_classic() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold", size = 14.5),
axis.title.y = element_text(face = "bold", size = 12),
axis.text.x = element_text(face = "bold", size = 10),
axis.text.y = element_text(face = "bold", size = 16)) +
scale_x_discrete(name ="Deconvolution", limits = c("PTC_myCAF_mean", "pEMT_myCAF_mean")) +
scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10),
limits = c(0, 11))
# Not, max and min values need to be restricted for each new gene that is plotted
ggsave("PTC_pEMT_myCAF_Log2_Mean_Distance_Boxplots.png",
width = 2.75, height = 3,
plot, dpi = 600, create.dir = TRUE)
# stats
distance_wide <- pivot_wider(mean_distances_plotting, names_from = Distance_Group, values_from = Distance)
t.test(distance_wide$pEMT_myCAF_mean, distance_wide$PTC_myCAF_mean, paired = TRUE)
需要source的脚本
distance_comparison <- function(distances, proportions, topic1, topic2, t1_thresh, t2_thresh){
# Filter the desired topics for barcode proportions greater than threshold and only keep barcode and topic column
filtered_props_1 <- proportions[proportions[,topic1] > t1_thresh, ]
rownames_holder <- rownames(filtered_props_1)
filtered_props_1 <- data.frame(filtered_props_1[, c(topic1)])
rownames(filtered_props_1) <- rownames_holder
rm(rownames_holder)
colnames(filtered_props_1) <- c(topic1)
print(filtered_props_1)
# Filter the desired topics for barcode proportions greater than threshold and only keep barcode and topic column
filtered_props_2 <- proportions[proportions[,topic2] > t2_thresh, ]
rownames_holder <- rownames(filtered_props_2)
filtered_props_2 <- data.frame(filtered_props_2[, c(topic2)])
rownames(filtered_props_2) <- rownames_holder
rm(rownames_holder)
colnames(filtered_props_2) <- c(topic2)
print(filtered_props_2)
# Create a new matrix that contains the first topic as rows and the second topic as columns
dist_filtered <- distances
dist_filtered <- dist_filtered[rownames(dist_filtered) %in% rownames(filtered_props_1), ]
# Filter columns
dist_filtered <- dist_filtered[, colnames(dist_filtered) %in% rownames(filtered_props_2)]
print(dist_filtered)
# Apply min function to each row (1=rows, 2=columns)
min_df <- apply(dist_filtered, 1, function(x) min(x))
# Convert to dataframe
min_df <- as.data.frame(min_df)
colnames(min_df) <- c("min")
comp_str <- paste0(topic1, " vs. ", topic2)
min_df$barcodes <- comp_str
print(min_df)
return(min_df)
}
生活很好,有你更好