我是一个很菜的鸟,为了计算一个基因与其他基因的相关分析,在网上找了很多资料!我看不懂脚本,就只能一个个死看,一个个尝试!最后,柳暗花明,找到这个朋友推送的方法。虽然我不懂他写了什么,也不懂每一步到底什么意思,但跟着这个朋友的分享的脚本跑了出来!非常感谢他!!(留念第一篇简书,希望大家批评指正!也希望给需要的朋友带来帮助!!)
备注:1、我的数据是自然群体的FPKM矩阵,横向为基因表达量,纵列为样本,第一行是样本名,第一列为基因名;
2、以下唯一要改的是将“gene A”修改成自己的目标基因,随后就是计算其他所有基因与目标基因的相关系数及p值!!
https://mp.weixin.qq.com/s?src=11×tamp=1675943258&ver=4340&signature=L2Ybm0h5bdumwp798BLbTQOi655Ehu0gG7ck5sDUA39ZpyxwVHvtxVneSc9EJYrvTme5-rYJdT-95bsj7lMByZlKLDpipE7DL1GyY5y2udZaDj3-T2ergQsY-bT08G0H&new=1
> rm(list = ls())
> options(stringsAsFactors = F)
> tcga_foxo1 <- read.table(file ="gwas.txt",header = T, sep = "\t", quote = "", check.names = F)
> class(tcga_foxo1)
[1] "data.frame"
> row.names(tcga_foxo1) <- tcga_foxo1[,1]
> exprSet1 <- tcga_foxo1 [,-1]
> test1 <- exprSet1[1:10,1:10]
> test1
B1 B10 B11 B12 B13 B14
LG0101766 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101798 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101802 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101812 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101832 0.3205862 3.6745567 0.67330880 0.5555175 1.100432 0.4851674
LG0101836 0.1826809 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101844 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101875 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
LG0101909 0.1714331 0.0963218 0.07501063 0.0825173 0.000000 0.0000000
LG0101924 0.0000000 0.0000000 0.00000000 0.0000000 0.000000 0.0000000
B15 B16 B17 B18
LG0101766 0.00000000 0.00000000 0.000000 0.00000000
LG0101798 0.00000000 0.00000000 0.000000 0.00000000
LG0101802 0.00000000 0.00000000 0.000000 0.00000000
LG0101812 0.00000000 0.00000000 0.000000 0.00000000
LG0101832 0.19367102 0.18983320 1.517738 0.76362680
LG0101836 0.18393392 0.00000000 0.000000 0.77703675
LG0101844 0.09542112 0.00000000 0.000000 0.00000000
LG0101875 0.00000000 0.00000000 0.000000 0.00000000
LG0101909 0.25891335 0.08459422 0.000000 0.07291937
LG0101924 0.00000000 0.00000000 0.000000 0.00000000
> exprSet2<- t(exprSet1 )
> class(exprSet1 )
[1] "data.frame"
> class(exprSet2)
[1] "matrix" "array"
> exprSet <- as.data.frame(exprSet2)
> class(exprSet)
[1] "data.frame"
> test2 <- exprSet[1:10,1:10]
> test2
LG0101766 LG0101798 LG0101802 LG0101812 LG0101832 LG0101836 LG0101844
B1 0 0 0 0 0.3205862 0.1826809 0.00000000
B10 0 0 0 0 3.6745567 0.0000000 0.00000000
B11 0 0 0 0 0.6733088 0.0000000 0.00000000
B12 0 0 0 0 0.5555175 0.0000000 0.00000000
B13 0 0 0 0 1.1004324 0.0000000 0.00000000
B14 0 0 0 0 0.4851674 0.0000000 0.00000000
B15 0 0 0 0 0.1936710 0.1839339 0.09542112
B16 0 0 0 0 0.1898332 0.0000000 0.00000000
B17 0 0 0 0 1.5177376 0.0000000 0.00000000
B18 0 0 0 0 0.7636268 0.7770368 0.00000000
LG0101875 LG0101909 LG0101924
B1 0 0.17143306 0
B10 0 0.09632180 0
B11 0 0.07501063 0
B12 0 0.08251730 0
B13 0 0.00000000 0
B14 0 0.00000000 0
B15 0 0.25891335 0
B16 0 0.08459422 0
B17 0 0.00000000 0
B18 0 0.07291937 0
> y <-as.numeric(exprSet[,"gene A"])
> colnames <- colnames(exprSet)
> cor_data_df <- data.frame(colnames)
> for (i in 1:length(colnames)){
+ test <- cor.test(as.numeric(exprSet[,i]),y,type="spearman")
+
+ cor_data_df[i,2] <- test$estimate
+
+ cor_data_df[i,3] <- test$p.value
+
+ }
There were 50 or more warnings (use warnings() to see the first 50) #忽略不用管这个
> names(cor_data_df) <- c("symbol","correlation","pvalue")
> head(cor_data_df)
symbol correlation pvalue
1 LG0101766 NA NA
2 LG0101798 NA NA
3 LG0101802 0.108265485 0.06947002
4 LG0101812 0.006733649 0.91036654
5 LG0101832 -0.144771832 0.01496744
6 LG0101836 0.004308967 0.94257104
> write.table(cor_data_df, file = "cor_data_df", sep = "\t",row.names = TRUE,col.names = NA)