建立索引,最新版的bcftools 1.11 支持不压缩排序,不过vcf 中染色体顺序要与header中的一致
New --no-index
which allows to merge unindexed files. Requires the input files to have chromosomes in th same order and consistent with the order of sequences in the header. (PR #1253; samtools/htslib#1089)
ls *.raw.vcf > all.vcf.lst
while read vcf ;do
bcftools view -o ${vcf}.gz -Oz ${vcf}
bcftools index ${vcf}.gz
done < all.vcf.lst
合并
lst=$( awk '{a=a" "$1".raw.snv_indel.vcf.gz";}END{print a}' A.sample.lst B.sample.lst )
#echo $lst
bcftools merge -Ov -o all_sample.merge.vcf $lst
转plink
vcftools --vcf all_sample.merge.allele.uniq.vcf --plink --out snp_indel
plink --file snp_indel --allow-no-sex --make-bed --noweb --out snp_indel.binary
#如果没有family ID, parental ID, sex, and/or phenotype columns使用下面的参数(适用于.fam和.ped文件)
#--no-fid
#--no-parents
#--no-sex
#--no-pheno