/usr/bin/bp_heterogeneity_test is in bioperl 1.6.901-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
# -*-Perl-*- (for my emacs)
use strict;
=head1 NAME
heterogeneity_test - a test for distinguishing between selection and population expansion.
=head1 SYNOPSIS
heterogenetity_test -mut_1/--mutsyn synonymous_mut_count -mut_2/--mutnon nonsyn_mut_count -s/--smaplesize sample_size [-i/--iterations iterations] [-o/--observed observed_D] [-v/--verbose] [--silent ] [-m/--method tajimaD or fuD] [--precision]
=head2 DESCRIPTION
This is an implementation of the Heterogenetity test as described in
Hahn MW, Rausher MD, and Cunningham CW. 2002. Genetics 161(1):11-20.
=head2 OPTIONS
Options in brackets above are optional
-s or --samplesize samplesize
-mut_1 or --mutsyn synonymous mutation count
-mut_2 or --mutnon nonsynonmous mutation count
-i or --iterations number of iterations
-o or --observed observed D
-m or --method tajimaD or fuD for Tajima's D or Fu and Li's D
-v or --verbose print out extra verbose messages
--silent Be extra quiet
--precision Level of precision - specify the number of digits
(default 4)
=head2 AUTHOR Matthew Hahn E<lt>matthew.hahn-at-duke.eduE<gt>
For more information contact:
Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
=cut
use Getopt::Long;
use Bio::PopGen::Simulation::Coalescent;
use Bio::PopGen::Statistics;
use Bio::PopGen::Individual;
use Bio::PopGen::Genotype;
my $sample_size = 4;
my $mut_count_1 = 10; # synonymous
my $mut_count_2 = 20; # non-synonymous
my $iterations = 1;
my $verbose = 0;
my $observedD = undef;
my $method = 'fuD';
my $help = 0;
my $precision = '4'; # Let's make the random precision between
# 0->1 to 1000th digits
GetOptions(
's|samplesize|samp_size:i' => \$sample_size,
'mut_1|mutsyn:i' => \$mut_count_1,
'mut_2|mutnon:i' => \$mut_count_2,
'i|iterations:i' => \$iterations,
'o|obsered|observedD:f' => \$observedD,
'v|verbose' => \$verbose,
'm|method:s' => \$method,
'h|help' => \$help,
'silent' => sub { $verbose = -1; },
'p|precision:i' => \$precision,
);
if( $help ) {
system("perldoc",$0);
exit(0);
}
if( $method ne 'fuD' and $method ne 'tajimaD' ) {
die("available methods are [fu and li's D] (fuD) and Tajima's D (tajimaD)");
}
my @D_distribution;
printf("sample size is %d iteration count = %d\n", $sample_size,
$iterations);
my $sim = new Bio::PopGen::Simulation::Coalescent
(-sample_size => $sample_size);
for(my $iter = 0; $iter < $iterations; $iter++ ) {
my $tree = $sim->next_tree($sample_size);
my $f1 = 0;
if( $mut_count_1 > 0 ) {
$sim->add_Mutations($tree,$mut_count_1,$precision);
my @leaves = $tree->get_leaf_nodes;
# the outgroup is just an individual with the ancestral state
# (no mutations)
my $outgroup = new Bio::PopGen::Individual();
foreach my $m ( $leaves[0]->get_marker_names ) {
$outgroup->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name=> $m,
-alleles => [ 0 ]));
}
if( $method eq 'fuD' ) {
$f1 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
} elsif( $method eq 'tajimaD' ) {
$f1 = Bio::PopGen::Statistics->tajima_D(\@leaves);
}
print "(mutation count = $mut_count_1) D=$f1\n"
if( $verbose >= 0);
}
my $f2 = 0;
if( $mut_count_2 > 0 ) {
$sim->add_Mutations($tree,$mut_count_2,$precision);
my @leaves = $tree->get_leaf_nodes;
# the outgroup is just an individual with the ancestral state
# (no mutations)
my $outgroup = new Bio::PopGen::Individual();
foreach my $m ( $leaves[0]->get_marker_names ) {
$outgroup->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name=> $m,
-alleles => [ 0 ]));
}
if( $method eq 'fuD' ) {
$f2 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
} elsif( $method eq 'tajimaD' ) {
$f2 = Bio::PopGen::Statistics->tajima_D(\@leaves);
}
print "(mutation count = $mut_count_2) D=$f2\n" if( $verbose >= 0);
}
my $deltaD = ( $f1 - $f2 );
push @D_distribution, $deltaD;
if( $iter % 10 == 0 && $iter > 0 ) {
print STDERR "iter = $iter\n";
}
}
if( defined $observedD && $iterations > 1 ) {
my @sortedD = sort { $a <=> $b } @D_distribution;
my $i;
for($i = 0; $i < scalar @sortedD; $i++ ) {
if( $sortedD[$i] > $observedD ) {
last;
}
}
printf( "index %d value=%.4f out of %d total (obs=%.4f)\n",
$i, $sortedD[$i], scalar @sortedD, $observedD);
}
|