散点图:菌群与代谢物的相关性
散点图能较好地展示两坐标的相关趋势,再通过线性回归可计算两指标的相关关系,最终结合散点图以及相关关系结果共同在图上展示其结果。
导入所需R包
library(dplyr)
library(tibble)
library(ggplot2)
library(patchwork)
grp <- c("Case", "Control")
grp.col <- c("#283891", "#ED1C24")
读取数据
phen <- read.csv("phenotype.csv")
metabolite <- read.csv("metabolite.csv") %>% column_to_rownames("X")
phylum <- read.csv("phylum.csv") %>% column_to_rownames("phylum")
函数
cor_plot_fun <- function(meta="Putrescine",
taxo="p__Bacteroidetes"){
mdat <- phen %>% inner_join(metabolite %>%
rownames_to_column("SampleID"),
by="SampleID") %>%
tidyr::gather(key="metabolism", value="MetaScore",
-c("SampleID", "Group")) %>%
inner_join(phylum %>% t() %>% data.frame() %>%
select(taxo) %>%
setNames("TaxScore") %>%
rownames_to_column("SampleID"),
by="SampleID")
xlab <- paste(meta, "activity score")
ylab <- paste(taxo, "Abundance")
mdat_cln <- mdat %>% filter(metabolism%in%meta) %>%
mutate(Group=factor(Group, levels = grp))
dat_lab <- c()
fr <- levels(mdat_cln$Group)
for(i in 1:length(fr)){
mdat_cln_fr <- mdat_cln %>% filter(Group%in%fr[i])
fit <- lm(formula = TaxScore ~ MetaScore, data = mdat_cln_fr)
lm.lst <- list(a = as.numeric(format(coef(fit)[1], digits = 4)),
b = as.numeric(format(coef(fit)[2], digits = 4)),
r2 = format(summary(fit)$r.squared, digits = 4),
p = format(summary(fit)$coefficients[2,4], digits = 4))
eq <- substitute(italic(y) == a + b %.% italic(x)~","~italic(R)^2~"="~r2~","~italic(P)~"="~p, lm.lst)
lab <- substitute(~italic(R)^2~"="~r2~","~italic(P)~"="~p, lm.lst)
MScore <- with(mdat_cln_fr, (max(MetaScore)-min(MetaScore))/2+min(MetaScore))
TScore <- with(mdat_cln_fr, max(TaxScore)-(max(TaxScore)-min(TaxScore))/8)
temp <- data.frame(MetaScore=MScore,
TaxScore=TScore,
Group=fr[i],
label=as.character(as.expression(lab)))
dat_lab <- rbind(dat_lab, temp)
}
pl <- ggplot(mdat_cln, aes(x=MetaScore, y=TaxScore))+
geom_point()+
stat_smooth(method='lm', formula = y~x, color="red")+
geom_text(data=dat_lab, aes(x=MetaScore, y=TaxScore, label = label),
size = 4, parse = TRUE) +
labs(x=xlab, y=ylab)+
facet_wrap(~Group, scales = "free")+
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 10),
axis.text.y = element_text(color = 'black',size = 8),
axis.text.x = element_text(color = 'black',size = 8),
strip.text = element_text(size = 10, face = "bold"))
return(pl)
}
运行函数
- p__Bacteroidetes 和三类代谢物的相关关系
Putrescine_plot <- cor_plot_fun(meta = "Putrescine", taxo="p__Bacteroidetes")
Eicosenoic__plot <- cor_plot_fun(meta = "Eicosenoic_acid", taxo="p__Bacteroidetes")
Glyceric_plot <- cor_plot_fun(meta = "Glyceric_acid", taxo="p__Bacteroidetes")
(Putrescine_plot / Eicosenoic__plot / Glyceric_plot) +
plot_layout(ncol = 1)
结果显示,p__Bacteroidetes与三类代谢物均不存在显著相关关系。
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
system code page: 936
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] patchwork_1.0.1 ggplot2_3.3.2 tibble_3.0.3 dplyr_1.0.0
loaded via a namespace (and not attached):
[1] rstudioapi_0.11 knitr_1.29 magrittr_1.5 tidyselect_1.1.0 munsell_0.5.0
[6] colorspace_1.4-1 R6_2.4.1 rlang_0.4.7 tools_4.0.2 grid_4.0.2
[11] gtable_0.3.0 xfun_0.18 withr_2.2.0 htmltools_0.5.0 ellipsis_0.3.1
[16] yaml_2.2.1 digest_0.6.25 lifecycle_0.2.0 crayon_1.3.4 purrr_0.3.4
[21] vctrs_0.3.2 glue_1.4.1 evaluate_0.14 rmarkdown_2.4 compiler_4.0.2
[26] pillar_1.4.6 generics_0.0.2 scales_1.1.1 pkgconfig_2.0.3