参考资料
南方医科大学网红博导Y叔的一本神奇的书
https://yulab-smu.github.io/clusterProfiler-book/chapter12.html#bar-plot
好像一直在更新,没怎么宣传过,不管了,拿来学习。
1.安装和加载包
if(!require(clusterProfiler)) BiocManager::install("clusterProfiler")
if(!require(dplyr)) install.packages("dplyr")
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:AnnotationDbi':
#>
#> select
#> The following objects are masked from 'package:IRanges':
#>
#> collapse, desc, intersect, setdiff, slice, union
#> The following objects are masked from 'package:S4Vectors':
#>
#> first, intersect, rename, setdiff, setequal, union
#> The following object is masked from 'package:Biobase':
#>
#> combine
#> The following objects are masked from 'package:BiocGenerics':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
if(!require(ggplot2)) install.packages("ggplot2")
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(enrichplot)
library(cowplot)
2.示例数据
使用limma差异分析的结果,在生信星球公众号回复“富集输入”即可获得
load("step4output.Rdata")
geneList=deg$logFC
names(geneList)=deg$ENTREZID
geneList=sort(geneList,decreasing = T)
geneList2 = geneList[abs(geneList) > 1.5]
gene <- names(geneList)[abs(geneList) > 1.5]
3.GO富集分析
(1)富集分析
ego <- enrichGO(gene = gene,
universe = names(geneList),
OrgDb = "org.Hs.eg.db",
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
head(ego)
#> ID Description GeneRatio BgRatio
#> GO:0042555 GO:0042555 MCM complex 8/330 11/16112
#> GO:0044454 GO:0044454 nuclear chromosome part 34/330 436/16112
#> GO:0000228 GO:0000228 nuclear chromosome 35/330 472/16112
#> GO:0098687 GO:0098687 chromosomal region 28/330 320/16112
#> GO:0000785 GO:0000785 chromatin 33/330 468/16112
#> GO:0000775 GO:0000775 chromosome, centromeric region 16/330 177/16112
#> pvalue p.adjust qvalue
#> GO:0042555 4.453098e-12 1.705536e-09 1.528116e-09
#> GO:0044454 2.276461e-11 4.359423e-09 3.905928e-09
#> GO:0000228 4.620969e-11 5.899437e-09 5.285740e-09
#> GO:0098687 1.037845e-10 9.937367e-09 8.903619e-09
#> GO:0000785 6.295599e-10 4.822429e-08 4.320769e-08
#> GO:0000775 7.480238e-07 4.774885e-05 4.278171e-05
#> geneID
#> GO:0042555 MCM3/MCM8/MCM2/MCM7/MCM5/MCM4/MCM6/MMS22L
#> GO:0044454 SNAI2/JUN/PPARGC1A/SATB1/ZEB2/MUC1/MCM3/CHAF1A/TIPIN/E2F1/MCM2/MCM7/HIST1H3A/HIST1H3J/SGO1/MCM5/MCM4/CDK1/MCM6/ASF1B/HIST1H3G/MMS22L/HIST1H1A/BRCA2/ORC1/RAD51AP1/HIST1H2BB/UHRF1/CDCA5/HIST1H1B/POLE2/ESCO2/BLM/RAD51
#> GO:0000228 SNAI2/JUN/PPARGC1A/SATB1/ZEB2/MUC1/MCM3/CHAF1A/TIPIN/HMGA2/E2F1/MCM2/MCM7/HIST1H3A/HIST1H3J/SGO1/MCM5/MCM4/CDK1/MCM6/ASF1B/HIST1H3G/MMS22L/HIST1H1A/BRCA2/ORC1/RAD51AP1/HIST1H2BB/UHRF1/CDCA5/HIST1H1B/POLE2/ESCO2/BLM/RAD51
#> GO:0098687 KAT2B/MCM3/ZWINT/TTK/NUF2/SKA3/MCM2/MCM7/CENPK/SGO1/DSCC1/MCM5/MAD2L1/MCM4/CDK1/MCM6/HELLS/SPC25/HJURP/SKA1/BRCA2/ORC1/HIST1H2BB/ERCC6L/CDCA5/ESCO2/BLM/RAD51
#> GO:0000785 SNAI2/JUN/PPARGC1A/MAF/SATB1/ZEB2/MUC1/CHAF1A/TIPIN/HMGA2/PHC2/E2F1/MCM2/MCM7/HIST1H3A/HIST1H3J/DSCC1/HIST1H2AB/HIST1H2AI/ASF1B/HELLS/HIST1H2BM/HIST1H3G/HIST1H2AK/WDR76/HIST1H1A/RAD51AP1/HIST1H2BB/UHRF1/CDCA5/HIST1H1B/ESCO2/RAD51
#> GO:0000775 KAT2B/ZWINT/TTK/NUF2/SKA3/CENPK/SGO1/DSCC1/MAD2L1/HELLS/SPC25/HJURP/SKA1/ERCC6L/CDCA5/ESCO2
#> Count
#> GO:0042555 8
#> GO:0044454 34
#> GO:0000228 35
#> GO:0098687 28
#> GO:0000785 33
#> GO:0000775 16
(2)富集分析可视化
噼里啪啦8张图来了,为了省点地方可以做成拼图
#(1)条带图
p1=barplot(ego,showCategory=20)
#(2)点图
p2=dotplot(ego)
#> wrong orderBy parameter; set to default `orderBy = "x"`
#(3)展示top5通路的共同基因
#Gene-Concept Network
p3=cnetplot(ego, categorySize="pvalue", foldChange=geneList,colorEdge = TRUE)
p4=cnetplot(ego, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
gg <- ggdraw() +
draw_plot(p1, x=0, y=0.5, width=0.5, height=0.5) +
draw_plot(p2, 0, 0, 0.5, 0.5) +
draw_plot(p3, 0.5, 0, 0.5, 0.5) +
draw_plot(p4,0.5, 0.5, 0.5, 0.5)
gg
#Enrichment Map
emapplot(ego)
#(4)展示通路关系
goplot(ego)
#(5)Heatmap-like functional classification
heatplot(ego,foldChange = geneList)
像最后的热图,不调整比例的话不好看
pdf("heatplot.pdf",width = 14,height = 5)
heatplot(ego,foldChange = geneList)
dev.off()
#> RStudioGD
#> 2
4.gsea-GO分析
ego2 <- gseGO(geneList = geneList2,
OrgDb = org.Hs.eg.db,
ont = "CC",
nPerm = 1000,
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
#gsea
ridgeplot(ego2)
#> Picking joint bandwidth of 0.131
#gesa图
gseaplot2(ego2, geneSetID = 1, title = ego2$Description[1])
#多条
gseaplot2(ego2, geneSetID = 1:3)
#加table
gseaplot2(ego2, geneSetID = 2:4, pvalue_table = TRUE)
#上下三联
pp <- lapply(1:3, function(i) {
anno <- ego2[i, c("NES", "pvalue", "p.adjust")]
lab <- paste0(names(anno), "=", round(anno, 3), collapse="\n")
gsearank(ego2, i, ego2[i, 2]) + xlab(NULL) +ylab(NULL) +
annotate("text", 0, ego2[i, "enrichmentScore"] * .9, label = lab, hjust=0, vjust=0)
})
plot_grid(plotlist=pp, ncol=1)
保存时调比例
ggsave("try.pdf",width = 15,height = 18)
公众号生信星球同步更新我的文章,欢迎大家关注!