R绘制三维PCA散点图,scatterplot3d
# 1 PCA分析
plink --vcf snp.flt.vcf --pca 6 --out PCA_out --allow-extra-chr --set-missing-var-ids @:#
# 结果文件: PCA_out.eigenvec 各个样品各个成分的值。
# 2 在Rstudio中绘图
#(1)准备输入文件
# plink产生的结果文件PCA_out.eigenvec,改变数据格式并另存到pca_3d.txt。
# pca_3d.txt: 一个样品为一列,三个主成分即可,例如:
# A1 A2 A3 A...
# PC1 0.0905844 0.0908204 0.0907446 ...
# PC2 0.145941 0.147013 0.147025 ...
# PC3 0.177598 0.178904 0.177893 ...
#(2)R包的安装与加载
install.packages('gmodels')
library(gmodels)
install.packages('scatterplot3d') #安装R包
library(scatterplot3d) #加载R包
example(scatterplot3d) #查看案例结果
# help(scatterplot3d) #查看帮助文件
#(3)在Rstudio中绘图
library(scatterplot3d) #加载R包
library(gmodels)
data <- read.table("pca_3d.txt",header = T,row.names = 1,sep="\t",check.names = F) #读取数据
pca.info <- fast.prcomp(data) #计算PCA
# 新建一个data.frame,包含所有样本信息和PC信息,前12行为分组A,接着11行为分组B......
pca.data <- data.frame(sample = rownames(pca.info$rotation),
Type = c(rep("A",12),rep("B",11),rep("C",5),rep("D",8),rep("E",3),rep("F",6)),
pca.info$rotation)
# 不同分组的备选颜色:black,grey,purple,orange,magenta,red,cyan,blue,lawngreen,green3,green,gold,deepskyblue
colors.lib <- c("magenta","blue","purple","red","orange","green3")
colors <- colors.lib[as.numeric(pca.data$Type)]
# 1空心圆,2空心三角形,3加号,4叉,5空心菱形,6倒空心三角形,7正方形+叉,8加号+叉,9菱形+加号,10圆圈+加号,11线画五角星,12线画田字格,13圆圈+叉,14四方形+三角形,15实体四方形,16实体圆形,17实体三角形,18实体菱形,19加粗实体圆形
shapes.lib = c(16,16,16,16,17,16) #不同分组可以设置不同的形状,也可以统一设置颜色pch=16。
shapes <- shapes.lib[as.numeric(pca.data$Type)]
# 增加图注,topright为右上方,bottom为底部中间
s3d <- scatterplot3d(pca.data[,c("PC1","PC2","PC3")],
pch = shapes,color = colors,
angle = 60,cex.symbols = 1,
main = "3D PCA plot")
legend("topright",legend = levels(pca.data$Type),
col = colors.lib,pch = shapes.lib,
inset = -0.12,xpd = TRUE,horiz = F)
# 保存图片:Plots, Export, Save as PDF
### 标注样本名称
s3d <- scatterplot3d(pca.data[,c("PC1","PC2","PC3")],
pch = shapes,color = colors,
angle = 60,cex.symbols = 1,
main = "3D PCA plot")
legend("topright",legend = levels(pca.data$Type),
col = colors.lib,pch = shapes.lib,
inset = -0.12,xpd = TRUE,horiz = F)
text(s3d$xyz.convert(pca.data[,c("PC1","PC2","PC3")] + 0.02),
labels = pca.data$sample,
cex = 0.5,col = "black")