1000genomes-to-Annovar



wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20110521/ ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz

zcat ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz |  \ grep -v "^#" | cut -f 1,2,4,5 > part1.txt

zcat ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz |  \ grep -v "^#" | cut -f 8 | awk '{print "tmp;"$0";tmp"}'  >  part2.txt zcat ALL.wgs.phase1_release_v3.20101123.snps_indels_sv.sites.vcf.gz |  \ grep -v "^#" | cut -f 3 > part3.txt

cat part2.txt |  awk 'gsub(/.*;AF=|;.*/,"")' | awk  ' {if ($1 ~ "[0-9][.]*$" )    \ { print $1 } else print "NA"}' >    ALL.AF.txt

cat part2.txt | awk 'gsub(/.*;AMR_AF=|;.*/,"")'  | awk  ' {if ($1 ~ "[0-9][.]*$" )  \ { print $1 } else print "NA"}'    >    AMR.AF.txt

cat part2.txt | awk 'gsub(/.*;EUR_AF=|;.*/,"")'   | awk  ' {if ($1 ~ "[0-9][.]*$" )  \ { print $1 } else print "NA"}'   >    EUR.AF.txt

cat part2.txt | awk 'gsub(/.*;AFR_AF=|;.*/,"")'   | awk  ' {if ($1 ~ "[0-9][.]*$" )  \ { print $1 } else print "NA"}'   >    AFR.AF.txt

cat part2.txt | awk 'gsub(/.*;ASN_AF=|;.*/,"")'   | awk  ' {if ($1 ~ "[0-9][.]*$" )  \ { print $1 } else print "NA"}'   >    ASN.AF.txt

for i in ALL AMR AFR ASN EUR; do   paste part1.txt    $i.AF.txt     part3.txt  |    \ awk '{ if (length($3)>1) {$2=$2+1; $4=length($3)-1; $3=substr($3,2) } \ else if (length($4)>1) {$4="0"substr($4,2) }; \ if ($4=="0DEL>") {$4="1" }; \ print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6  }'  | grep -v NA >   $i.wgs.phase1_release_v3.20101123.txt

done