/usr/bin/bp_mutate is in bioperl 1.6.924-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
=head1 NAME
bp_mutate.pl - randomly mutagenize a single protein or DNA sequence
=head1 SYNOPSIS
./bp_mutate.pl -p 25 -i test.fa -n 5 -f swiss -o muts.swiss
#or
./bp_mutate.pl --percent=25 --input=test.fa --number=5 -output=x.fa
=head1 DESCRIPTION
Randomly mutagenize a single protein or DNA sequence one or more times.
Specify percentage mutated and number of resulting mutant sequences.
Print mutagenized sequences to STDOUT or write to an output file.
-h|--help Help
-p|--percent Percent mutagenized
-n|--number Number of mutant sequences created
-o|--output Output file (optional)
-f|--format Output format (default: fasta)
-i|--input Input file
=head1 FEEDBACK
User feedback is an integral part of the evolution of this and other
Bioperl scripts. Send your comments and suggestions to the Bioperl
mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via the
web:
https://github.com/bioperl/bioperl-live/issues
=head1 AUTHOR
Brian Osborne, bosborne at alum.mit.edu
=cut
use strict;
use warnings;
use Getopt::Long;
use Bio::SeqIO;
my ( $help, $in_file, $percent, $out_file, $number );
my $format = "fasta";
my @dna = qw(a g c t);
my @amino = qw(a c d e f g h i k l m n p q r s t v w y);
GetOptions(
"h|help" => \$help,
"p|percent:i" => \$percent,
"n|number:i" => \$number,
"o|output:s" => \$out_file,
"f|format:s" => \$format,
"i|input:s" => \$in_file
);
usage() if ( $help || !$percent || !$in_file || !$number || $percent > 100 );
# Seed the random number generator. "time|$$" combines the
# current time with the current process id
srand( time | $$ );
my $seqio = Bio::SeqIO->new( -file => $in_file );
my $seqobj = $seqio->next_seq;
my $num_mut = percent_to_num($percent);
my @seq_arr = ();
# don't keep a mutant that's already been made
while ( $number > $#seq_arr + 1 ) {
my $mut_seq = mutate_all( $seqobj, $num_mut );
push @seq_arr, $mut_seq unless ( grep /$mut_seq/, @seq_arr );
}
foreach my $mut_seq (@seq_arr) {
my $name = $seqobj->display_id . "-${percent}_percent-$number";
my $outseq = Bio::Seq->new(
-seq => $mut_seq,
-display_id => $name,
-desc => $seqobj->desc
);
my %args = ( -format => $format );
$args{-file} = ">>$out_file" if $out_file;
my $seqio = Bio::SeqIO->new(%args);
$seqio->write_seq($outseq);
$number--;
}
# mutagenize the sequence, one-by-one
sub mutate_all {
my ( $seq_obj, $num ) = @_;
my $type = $seq_obj->alphabet;
my $str = $seq_obj->seq;
# store the mutagenized positions in $positions
my $positions = "";
for ( my $i = 0 ; $i < $num_mut ; ++$i ) {
( $str, $positions ) = mutate_one( $str, $type, $positions );
}
$str;
}
# mutagenize one position
sub mutate_one {
my ( $str, $type, $positions ) = @_;
my ( $position, $new_char );
# pick a random position in the sequence, checking
# that the position isn't already mutagenized
do {
$position = random_position($str);
} until ( $positions !~ /\b$position\b/ );
$positions .= "$position ";
my $current_char = substr( $str, $position, 1 );
# pick a random char that's not the existing char
do {
$new_char = random_char($type);
} until ( $new_char ne $current_char );
substr( $str, $position, 1, $new_char );
( $str, $positions );
}
# randomly select a position in the sequence
sub random_position {
my $string = shift;
int( rand( length($string) ) );
}
# randomly select one of the chars depending on alphabet
sub random_char {
my $type = shift;
$type eq "protein"
? return $amino[ rand @amino ]
: return $dna[ rand @dna ];
}
sub percent_to_num {
my $percent = shift;
int( $percent / 100 * length( $seqobj->seq ) );
}
sub usage {
exec( 'perldoc', $0 );
exit(0);
}
__END__
|