作者:ahworld
链接:GO 富集结果可视化优化笔记
来源:微信公众号seqyuan
著作权归作者所有,任何形式的转载都请联系作者。
library(clusterProfiler)
library(ggplot2)
library(dplyr)
library(org.Hs.eg.db)
library(viridis)
genes_df <- read.table('genes.txt') # 一列基因,SYMBOL格式
geneID <- bitr(genes_df$V1, fromType = "SYMBOL",
toType = c("ENTREZID", "SYMBOL"),
OrgDb = org.Hs.eg.db)
all.GO <- enrichGO(gene = geneID$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = T)
allgg <- as.data.frame(all.GO)
# 按照pvalue取每个ONTOLOGY的前5
top5 <- allgg %>%
group_by(ONTOLOGY) %>%
arrange(pvalue) %>%
slice_head(n = 5)
# 我这里设置的只取CC和MF
df_top5 <- rbind(subset(top5, ONTOLOGY=="CC"), subset(top5, ONTOLOGY=="MF"))
df_top5$ONTOLOGY <- factor(df_top5$ONTOLOGY, levels=c('CC', 'MF'))
df_top5$Description <- factor(df_top5$Description, levels = rev(df_top5$Description))
options(repr.plot.width=7, repr.plot.height=6.5)
mycol3 <- c('#6BA5CE', '#F5AA5F')
cmap <- c("viridis", "magma", "inferno", "plasma", "cividis", "rocket", "mako", "turbo")
p <- ggplot(data = df_top5, aes(x = Count, y = Description, fill=ONTOLOGY)) +
geom_bar(width = 0.5,stat = 'identity') +
theme_classic() +
scale_x_continuous(expand = c(0,0.5)) +
scale_fill_manual(values = alpha(mycol3, 0.66))
p <- p + theme(axis.text.y = element_blank()) +
geom_text(data = df_top5,
aes(x = 0.1, y = Description, label = Description),
size = 4.8,
hjust = 0)
p <- p + geom_text(data = df_top5,
aes(x = 0.1, y = Description, label = geneID , color=-log10(pvalue)),
size = 4,
fontface = 'italic',
hjust = 0,
vjust = 2.7) +
scale_colour_viridis(option=cmap[7], direction=-1)
p <- p + labs(title = 'Enriched top 5 CC and MF') +
theme(
plot.title = element_text(size = 14, face = 'bold'),
axis.title = element_text(size = 13),
axis.text = element_text(size = 11),
axis.ticks.y = element_blank())
ggsave("gene_GO.pdf", p, w=7, h=6.5)