论文
Plasma proteome analyses in individuals of European and African ancestry identify cis-pQTLs and models for proteome-wide association studies
https://www.nature.com/articles/s41588-022-01051-w
本地pdf s41588-022-01051-w.pdf
代码链接
https://zenodo.org/record/6332981#.YroV0nZBzic
https://github.com/Jingning-Zhang/PlasmaProtein/tree/v1.2
今天的推文重复一下论文中的Figure4中的一个小图
部分示例数据截图
读取数据
dat01<-read.delim("data/20220627/Fig4.tsv",
header=TRUE,
sep="\t")
dim(dat01)
head(dat01)
根据disease列对数据进行筛选
dat_all<-dat01[dat01$disease=="Urate",]
统计染色体数
nCHR<-length(unique(dat_all$CHR))
nCHR
计算每个染色体中心位置坐标
这个是用来添加横坐标文本标签的
library(tidyverse)
axis.set<-dat_all %>%
group_by(CHR) %>%
summarise(center=(max(BPcum)+min(BPcum))/2)
axis.set
根据tissue列对数据进行筛选
dat <- dat_all[dat_all$tissue=="Plasma",]
作图代码
ggplot(data = dat,
aes(x=BPcum,y=-log10(P),
color=as.factor(CHR),
size=-log10(P)))+
geom_point()+
scale_x_continuous(labels = axis.set$CHR,
breaks = axis.set$center,
limits = c(min(dat_all$BPcum),max(dat_all$BPcum)))+
scale_y_continuous(expand = c(0,0), limits = c(0, 32 )) +
scale_color_manual(values = rep(c("#4292c6", "#08306b"), nCHR)) +
scale_size_continuous(range = c(0.5,3))+
geom_hline(yintercept = -log10(3.7*10^(-5)),
lty="dashed")+
guides(color = "none") +
labs(x = NULL,
title = NULL) +
ylab( TeX("$-log_{10}(p)$") )+
theme_minimal() +
theme(
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 0, size = 5, vjust = 0.5),
axis.text.y = element_text(angle = 0, size = 6, vjust = 0.5),
axis.title = element_text(size=7),
plot.title = element_text(size = 7, face = "bold"),
plot.subtitle = element_text(size = 7),
axis.line = element_line(),
axis.ticks = element_line()
) -> p
p
给选定的基因添加文本标签
label <- c("INHBB","ITIH1","BTN3A3","INHBA","C11orf68","B3GAT3","INHBC(7.95e-63)","SNUPN","NEO1","FASN")
labels_df.pwas <- data.frame(label=label,
logP=-log10(dat$P[match(label,dat$ID)]),
BPcum=dat$BPcum[match(label,dat$ID)],
CHR=dat$CHR[match(label,dat$ID)])
labels_df.pwas <- labels_df.pwas[order(labels_df.pwas$BPcum),]
p +
ggrepel::geom_label_repel(data = labels_df.pwas[1:4,],
aes(x = .data$BPcum,
y = .data$logP,
label = .data$label), col="black",
size = 1.5, segment.size = 0.2,
point.padding = 0.3,
ylim = c(5, 30),
nudge_y=0.2,
direction = "y",
min.segment.length = 0, force = 2,
box.padding = 0.5) +
ggrepel::geom_label_repel(data = labels_df.pwas[7,],
aes(x = .data$BPcum,
y = .data$logP,
label = .data$label), col="black",
size = 1.5, segment.size = 0.2,
point.padding = 0.3,
direction = "y",
ylim = c(20, 30),
min.segment.length = 0, force = 2,
box.padding = 0.5)+
ggrepel::geom_label_repel(data = labels_df.pwas[c(5,6,8:10),],
aes(x = .data$BPcum,
y = .data$logP,
label = .data$label), col="black",
size = 1.5, segment.size = 0.2,
point.padding = 0.3,
ylim = c(6, 25),
min.segment.length = 0, force = 2,
box.padding = 0.5)
拼图
p+
scale_color_manual(values = rep(c("red", "darkgreen"), nCHR))+
theme(legend.direction = "horizontal")+
p2 + theme(legend.direction = "horizontal")+
plot_layout(guides="collect")+
plot_annotation(theme = theme(legend.position = "top"))
示例数据和代码可以自己到论文中获取,或者给本篇推文点赞,点击在看,然后留言获取
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!