欢迎关注Bioinfor 生信云!
这一期我们来讲讲富集分析。
构建OrgDB
OrgDB是bioconductor中存储不同数据库基因ID之间的对应关系,以及基因与GO等注释的对应关系的R软件包。共有19个物种,可以在这里下载
没有OrgDB的物种可以通过AnnotationForge自己构建
使用eggnog-mapper的注释结果构建OrgDB(私聊我获取构建OrgDB脚本)
#安装自己构建的OrgDB
dir.create('R_lib', recursive = T)
install.packages('org.My.eg.db_1.0.tar.gz',
repos = NULL,
lib = 'R_lib')
library(org.My.eg.db, lib = 'R_lib')
GO富集(自建OrgDB的物种)
gene <- filter(de_result,
abs(log2FoldChange) > 1 & padj < 0.05) %>%
pull(id)
geneList <- de_result$log2FoldChange
names(geneList) <- de_result$id
geneList <- sort(geneList, decreasing = T)
de_go <- enrichGO(gene = gene,
OrgDb = org.My.eg.db,
keyType = 'GID',
ont = 'ALL',
qvalueCutoff = 0.05,
pvalueCutoff = 0.05)
de_go_df <- as.data.frame(de_go)
barplot(de_ego, showCategory = 10)
dotplot(de_ego, showCategory = 10)
cnetplot(de_ego,
foldChange = geneList,
showCategory = 5,
node_label = "all", # category | gene | all | none
circular = TRUE,
colorEdge = TRUE)
GO富集(已有OrgDB的物种,拟南芥为例)
library(org.At.tair.db)
de_go <- enrichGO(gene = gene,
OrgDb = org.At.tair.db,
keyType = 'TAIR',
ont = 'ALL',
qvalueCutoff = 0.05,
pvalueCutoff = 0.05)
barplot(de_go, showCategory = 10)
dotplot(de_go, showCategory = 10)
test <- pairwise_termsim(de_go)
p1 <- treeplot(test, showCategory = 30)
p2 <- treeplot(test, hclust_method = "average")
plot_grid(p1, p2, lables = c('A', 'B'), nrow = 2)
emapplot(test)
emapplot_cluster(test)
KEGG富集分析(不常见物种)
emapper <- read_delim('query_seqs.fa.emapper.annotations',
"\t", escape_double = FALSE, col_names = FALSE,
comment = "#", trim_ws = TRUE) %>%
dplyr::select(GID = X1,
KO = X9,
Pathway = X10)
pathway2gene <- dplyr::select(emapper, Pathway, GID) %>%
separate_rows(Pathway, sep = ',', convert = F) %>%
filter(str_detect(Pathway, 'ko')) %>%
mutate(Pathway = str_remove(Pathway, 'ko'))
library(magrittr)
get_path2name <- function(){
keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
colnames(keggpathid2name.df) <- c("path_id","path_name")
return(keggpathid2name.df)
}
pathway2name <- get_path2name()
ibrary(clusterProfiler)
de_kegg <- enricher(gene,
TERM2GENE = pathway2gene,
TERM2NAME = pathway2name,
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
de_kegg_df <- as.data.frame(de_kegg)
barplot(de_kegg, showCategory = 10)
dotplot(de_kegg, showCategory = 10)
cnetplot(de_kegg,
foldChange = geneList,
showCategory = 3,
node_label = "category", # category | gene | all | none
circular = TRUE,
colorEdge = TRUE)
KEGG富集分析(常见物种,拟南芥为例)
library(tidyverse)
library(clusterProfiler)
de_kegg <- enrichKEGG(gene,
keyType = 'kegg',
organism = 'ath',
pvalueCutoff = 0.05,
pAdjustMethod = 'BH')
barplot(de_kegg, showCategory = 10)
dotplot(de_kegg, showCategory = 10)
cnetplot(de_kegg,
foldChange = geneList,
showCategory = 3,
node_label = "category", # category | gene | all | none
circular = TRUE,
colorEdge = TRUE)