/usr/lib/WigeoN/PerlLib/CdbTools.pm is in wigeon 20101212+dfsg-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | =head1 NAME
package CdbTools
=cut
=head1 DESCRIPTION
routines for extracting entries from Fasta file using the CDBtools cdbfasta and cdbyank.
=cut
;
package main;
our $SEE;
package CdbTools;
use strict;
use warnings;
require Exporter;
our @ISA = qw (Exporter);
our @EXPORT = qw (cdbyank linearize cdbyank_linear);
## cdbfasta and cdbyank must be in path, otherwise the system will die.
=over 4
=item cdbyank()
B<Description:> Retrieves a fasta sequence entry from a fasta database
B<Parameters:> accession, fastaFilename
B<Returns:> fastaEntry
use the linearize method to extract the fasta entry components
=back
=cut
;
sub cdbyank {
my ($accession, $fastaFile) = @_;
unless (-s "$fastaFile.cidx") {
## regenerate index file:
my $cmd = "cdbfasta -C $fastaFile";
my $ret = system $cmd;
if ($ret) {
die "Error, couldn't create index file: $cmd, ret($ret)\n";
}
}
my $cmd = "cdbyank -a \'$accession\' $fastaFile.cidx";
if ($SEE) {
print "CMD: $cmd\n";
}
my $fastaEntry = `$cmd`;
if ($?) {
die "Error, couldn't run cdbyank: $cmd, ret($?)\n";
}
unless ($fastaEntry) {
die "Error, no fasta entry retrieved by accession: $accession\n";
}
return ($fastaEntry);
}
=over 4
=item linearize()
B<Description:> breaks down a fasta sequence into its components
B<Parameters:> fastaEntry
B<Returns:> (accession, header, linearSequence)
=back
=cut
;
sub linearize {
my ($fastaEntry) = @_;
unless ($fastaEntry =~ /^>/) {
die "Error, fasta entry lacks expected format starting with header '>' character.\nHere's the entry\n$fastaEntry\n\n";
}
my @lines = split (/\n/, $fastaEntry);
my $header = shift @lines;
my $sequence = join ("", @lines);
$sequence =~ s/\s+//g;
$header =~ />(\S+)/;
my $accession = $1;
return ($accession, $header, $sequence);
}
=over 4
=item cdbyank_linear()
B<Description:> same as calling cdbyank (), and chasing it with linearize(), but only the sequence is returned
B<Parameters:> accession, fasta_db
B<Returns:> linearSequence
=back
=cut
;
sub cdbyank_linear {
my ($acc, $fasta_db) = @_;
my $fasta_entry = cdbyank($acc, $fasta_db);
my ($acc2, $header, $genome_seq) = linearize($fasta_entry);
return ($genome_seq);
}
1; #EOM
|