Gene-burden analysis

= Gene Burden Analysis =

01-generate-genes
&getgene; `perl -0777 -i -pe "s/\n\t\n/\n/g" genes-freq5-f*/*`; exit(0); sub getgene{ my $synaptome_file = "r3-freq5func-short-v2.txt"; #"$ARGV[0]"; my $freq = "freq5"; #"$ARGV[1]"; my $func = "func"; #"$ARGV[2]"; open(SFILE,$synaptome_file); my @syndata = ; my @lines = split(/\t/,$syndata[0]); for(my $i=0; $i<=scalar(@syndata)-1; $i++){ @lines = split(/\t/,$syndata[$i]); open FILE, ">>genes-$freq-$func/$lines[1]" or die $!; $, = "\t"; print FILE @lines,"\n"; close FILE; }	close SFILE; }
 * 1) !/usr/bin/perl
 * 1) $line[1] is the Gene column
 * 2) sub###

02-split.pl

 * 1) !/usr/bin/perl

&getsplit; exit(0);

sub getsplit{ my $synaptome_file = "$ARGV[0]"; my $allele1 = "$ARGV[1]"; my $allele2 = "$ARGV[2]"; my $totalcases   = "$ARGV[3]"; my $totalcontrols = "$ARGV[4]"; my $totalcases_w    = 0; my $totalcases_wo   = 0; my $totalcontrols_w = 0; my $totalcontrols_wo = 0; my $totalmissing = 0;

open(SFILE,"${synaptome_file}-tmp1"); open(OFILECASEW, ">>${synaptome_file}-index-casew"); open(OFILECASEWO,">>${synaptome_file}-index-casewo"); open(OFILECONTW, ">>${synaptome_file}-index-contw"); open(OFILECONTWO,">>${synaptome_file}-index-contwo"); open(OFILECASE, ">>${synaptome_file}-missing-index-case"); open(OFILECONT, ">>${synaptome_file}-missing-index-cont"); my @syndata = ; chomp $syndata[0]; my @lines = split(/ /, $syndata[0]); for(my $i=0; $i<=($totalcases*2)-1; $i++){ if (($lines[$i] eq $allele1) && ($lines[$i+1] eq $allele1)) {$totalcases_wo++; ; print OFILECASEWO (($i/2)+1),"\n";} if ( (($lines[$i] eq $allele1) && ($lines[$i+1] eq $allele2)) || (($lines[$i] eq $allele2) && ($lines[$i+1] eq $allele1)) ) {$totalcases_w++; ;print OFILECASEW (($i/2)+1),"\n";} if (($lines[$i] eq $allele2) && ($lines[$i+1] eq $allele2)) {$totalcases_w++;; print OFILECASEW (($i/2)+1),"\n";} if (($lines[$i] eq ".") || ($lines[$i+1] eq ".") ) {$totalmissing++; print OFILECASE (($i/2)+1),"\n";} $i++; }	for(my $i=($totalcases*2); $i<=scalar(@lines); $i++){ if (($lines[$i] eq $allele1) && ($lines[$i+1] eq $allele1)) {$totalcontrols_wo++;; print OFILECONTWO (($i/2)+1),"\n";} if ( (($lines[$i] eq $allele1) && ($lines[$i+1] eq $allele2)) || (($lines[$i] eq $allele2) && ($lines[$i+1] eq $allele1)) ) {$totalcontrols_w++; print OFILECONTW (($i/2)+1),"\n";} if (($lines[$i] eq $allele2) && ($lines[$i+1] eq $allele2)) {$totalcontrols_w++; print OFILECONTW (($i/2)+1),"\n";} if (($lines[$i] eq ".") || ($lines[$i+1] eq ".") ) {$totalmissing++; print OFILECONT (($i/2)+1),"\n";} $i++; }	close SFILE; }

03-run-cast

 * 1) !/bin/bash


 * 1) sh 03-run-cast.sh genes-freq5-func genes-freq5-func.lst   genes-freq5-func-burden.txt    251  361
 * 2) 	 genedir		gene-list		gene-burden-out-file		cases#   controls#

genedir=$1 totalcases=$4 totalcontrols=$5

for j in `cat $2` do

countvarintingene=`cat $genedir/$j | wc -l`

for i in `cat $genedir/$j | cut -f 1` do

bp=`echo $i | cut -f 3 -d'_' | awk '{split($1,array,""); print array[1]" "array[2]}'` bp1=`echo $i | cut -f 3 -d'_' | awk '{split($1,array,""); print array[1]}'` bp2=`echo $i | cut -f 3 -d'_' | awk '{split($1,array,""); print array[2]}'` grep -n -w $i r3.tped | cut -d' '  -f 5-   > $j-tmp1 perl 02-split.pl $j $bp1 $bp2 $totalcases $totalcontrols

done

countcase=`cat $j-index-casew | sort | uniq | wc -l`; countcont=`cat $j-index-contw | sort | uniq | wc -l`; countcasenotgeno=`cat $j-missing-index-case | sort | uniq -c | awk -v search="$countvarintingene"  \ '{if ($1==search) print }' | wc -l`; countcontnotgeno=`cat $j-missing-index-cont | sort | uniq -c | awk -v search="$countvarintingene"   \ '{if ($1==search) print }' | wc -l`;

tmp11=$countcase; tmp12=$countcont; tmp21=$(($totalcases-($countcasenotgeno+$countcase))); tmp22=$(($totalcontrols-($countcontnotgeno+$countcont)));

tmpnum=0.5; if [ $tmp11 == "0" ] ; then { tmp11="0.5"; tmp12=`echo "$tmp12+$tmpnum" | bc`; tmp21=`echo \ "$tmp21+$tmpnum" | bc`; tmp22=`echo "$tmp22+$tmpnum" | bc`; } fi if [ $tmp12 == "0" ] ; then { tmp12="0.5"; tmp11=`echo "$tmp11+$tmpnum" | bc`; tmp21=`echo \ "$tmp21+$tmpnum" | bc`; tmp22=`echo "$tmp22+$tmpnum" | bc`; } fi if [ $tmp21 == "0" ] ; then { tmp21="0.5"; tmp12=`echo "$tmp12+$tmpnum" | bc`; tmp11=`echo \ "$tmp11+$tmpnum" | bc`; tmp22=`echo "$tmp22+$tmpnum" | bc`; } fi if [ $tmp22 == "0" ] ; then { tmp22="0.5"; tmp12=`echo "$tmp12+$tmpnum" | bc`; tmp21=`echo \ "$tmp21+$tmpnum" | bc`; tmp11=`echo "$tmp11+$tmpnum" | bc`; } fi mp11=$countcase; mp12=$countcont; mp21=$(($totalcases-($countcasenotgeno+$countcase))); mp22=$(($totalcontrols-($countcontnotgeno+$countcont))); echo $j myor=`perl    stats/oddratio.pl     $tmp11 $tmp12 $tmp21 $tmp22 `; mychi=`perl   stats/chisquared.pl   $mp11  $mp12  $mp21  $mp22 `; myfexact=`perl stats/fisherexact.pl $mp11  $mp12  $mp21  $mp22 `; echo "$j $countcase $(($totalcases-($countcasenotgeno+$countcase))) \ $countcont $(($totalcontrols-($countcontnotgeno+$countcont))) $myor $mychi $myfexact" >> $3 rm -f $j-* done perl -0777 -i -pe "s/ /\t/g"  $3

04-gene_burden_v3

 * 1) !/bin/bash


 * 1) command:
 * 2) sh gene_burden.sh  gene_name   path_to_gene   out_filename    working_directory    tped_file    total_cases   total_control

j=$1 genedir=$2 geneburdenout=$3 wdir=$4 tpedfile=$5 totalcases=$6 totalcontrols=$7

countvarintingene=`cat $genedir/$j | wc -l`

for i in `cat $genedir/$j | cut -f 1` do bp=`echo $i | cut -f 3 -d'_' | awk '{split($1,array,""); print array[1]" "array[2]}'` bp1=`echo $i | cut -f 3 -d'_' | awk '{split($1,array,""); print array[1]}'` bp2=`echo $i | cut -f 3 -d'_' | awk '{split($1,array,""); print array[2]}'` grep -n -w $i $tpedfile | cut -d' '  -f 5-   > $genedir-tmp1/$j-tmp1 chr_bp=`echo $i | cut -f -2 -d'_'` check_relalt=`cat $genedir-tmp1/$j-tmp1 | wc -l ` if [ "$check_relalt" == "0" ] ; then { `grep -n -w "${chr_bp}_${bp2}${bp1}" $tpedfile |  cut -d' '  -f 5-   >> $genedir-tmp1/$j-tmp1` ;  `perl $wdir/02-split.pl $genedir-tmp1/$j $bp2 $bp1 $totalcases $totalcontrols ` ; } fi 	if [ "$check_relalt" != "0" ] ; then {  `perl $wdir/02-split.pl $genedir-tmp1/$j $bp1 $bp2 $totalcases $totalcontrols` ; } fi done

countcase=`cat $genedir-tmp1/$j-index-casew | sort | uniq | wc -l`; countcont=`cat $genedir-tmp1/$j-index-contw | sort | uniq | wc -l`;

countcasenotgeno=0 countcontnotgeno=0

tmp11=$countcase; tmp12=$countcont; tmp21=$(($totalcases-($countcasenotgeno+$countcase))); tmp22=$(($totalcontrols-($countcontnotgeno+$countcont)));

tmpnum=0.5; if [ $tmp11 == "0" ] ; then { tmp11="0.5"; tmp12=`echo "$tmp12+$tmpnum" | bc`; tmp21=`echo "$tmp21+$tmpnum" | bc`; tmp22=`echo "$tmp22+$tmpnum" | bc`; } fi; if [ $tmp12 == "0" ] ; then { tmp12="0.5"; tmp11=`echo "$tmp11+$tmpnum" | bc`; tmp21=`echo "$tmp21+$tmpnum" | bc`; tmp22=`echo "$tmp22+$tmpnum" | bc`; } fi; if [ $tmp21 == "0" ] ; then { tmp21="0.5"; tmp12=`echo "$tmp12+$tmpnum" | bc`; tmp11=`echo "$tmp11+$tmpnum" | bc`; tmp22=`echo "$tmp22+$tmpnum" | bc`; } fi; if [ $tmp22 == "0" ] ; then { tmp22="0.5"; tmp12=`echo "$tmp12+$tmpnum" | bc`; tmp21=`echo "$tmp21+$tmpnum" | bc`; tmp11=`echo "$tmp11+$tmpnum" | bc`; } fi;

mp11=$countcase; mp12=$countcont; mp21=$(($totalcases-($countcasenotgeno+$countcase))); mp22=$(($totalcontrols-($countcontnotgeno+$countcont)));

myor=`perl    /home/cluster/nas1/mpiroozn/apps/bin/stats/oddratio.pl     $tmp11 $tmp12 $tmp21 $tmp22 `; mychi=`perl   /home/cluster/nas1/mpiroozn/apps/bin/stats/chisquared.pl   $mp11  $mp12  $mp21  $mp22 `; myfexact=`perl /home/cluster/nas1/mpiroozn/apps/bin/stats/fisherexact.pl $mp11  $mp12  $mp21  $mp22 `;

echo "$j $countcase $mp21 $countcont $mp22 $myor $mychi $myfexact" >> $geneburdenout

rm -f $genedir-tmp1/$j-*

< Main Page