/usr/bin/load_genbank is in gbrowse 2.54+dfsg-6build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
# $Id: load_genbank.pl,v 1.1 2008-10-16 17:01:27 lstein Exp $
use strict;
use Bio::DB::GFF;
use Getopt::Long;
=head1 NAME
load_genbank.pl - Load a Bio::DB::GFF database from GENBANK files.
=head1 SYNOPSIS
% load_genbank.pl -d genbank -f localfile.gb
% load_genbank.pl -d genbank -a AP003256
NOTE: The script bp_genbank2gff.pl in the BioPerl distribution is the
same as this script.
=head1 DESCRIPTION
This script loads a Bio::DB::GFF database with the features contained
in a either a local genbank file or an accession that is fetched from
genbank. Various command-line options allow you to control which
database to load and whether to allow an existing database to be
overwritten.
This script currently only uses MySQL, though it is a proof-of-
principle and could easily be extended to work with other RDMS
that are supported by GFF through adaptors.
=head1 COMMAND-LINE OPTIONS
Command-line options can be abbreviated to single-letter options.
e.g. -d instead of --database.
--create Force creation and initialization of database
--dsn <dsn> Data source (default dbi:mysql:test)
--user <user> Username for mysql authentication
--pass <password> Password for mysql authentication
--proxy <proxy> Proxy server to use for remote access
--file Arguments that follow are Genbank/EMBL file names (default)
--accession Arguments that follow are genbank accession numbers
--stdout Write converted GFF file to stdout rather than loading
=head1 SEE ALSO
L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
=head1 AUTHOR
Scott Cain, cain@cshl.org
Lincoln Stein, lstein@cshl.org
Copyright (c) 2003 Cold Spring Harbor Laboratory
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. See DISCLAIMER.txt for
disclaimers of warranty.
=cut
package Bio::DB::GFF::Adaptor::biofetch_to_stdout;
use CGI 'escape';
use Bio::DB::GFF::Util::Rearrange;
use Bio::DB::GFF::Adaptor::biofetch;
use vars '@ISA';
@ISA = 'Bio::DB::GFF::Adaptor::biofetch';
sub new {
my $class = shift;
my $self = bless {},$class;
my ($proxy) = rearrange(['PROXY'],@_);
if ($proxy) {
my @args = ref($proxy) ? @$proxy : eval $proxy;
$self->{_proxy} = \@args if @args;
}
$self;
}
sub load_gff_line {
my ($self,$options) = @_;
# synthesize GFF3-compatible line
my @attributes;
if (my $parent = $options->{gname}) {
push @attributes,"Parent=".escape($parent) unless $options->{method} =~ /^gene$/;
push @attributes,"ID=".escape($parent);
}
if (my $tstart = $options->{tstart}) {
my $tstop = $options->{tstop};
my $target = escape($options->{gname});
push @attributes,"Target=$target+$tstart+$tstop";
}
my %a;
if (my $attributes = $options->{attributes}) {
for my $a (@$attributes) {
my ($tag,$value) = @$a;
push @{$a{escape($tag)}},escape($value);
}
for my $a (keys %a) {
push @attributes,"$a=".join(',',@{$a{$a}});
}
}
my $last_column = join ';',@attributes;
if ($options->{method} eq 'origin') {
print "##sequence-region $options->{gname} $options->{start} $options->{stop}\n";
}
$$options{score} ||='.';
$$options{source} ||='genbank';
print join("\t",@{$options}{qw(ref source method start stop score strand phase)},$last_column),"\n";
}
sub load_sequence_string {
my $self = shift;
my ($acc,$seq) = @_;
$seq =~ s/(.{1,60})/$1\n/g;
print ">$acc\n\L$seq\U\n";
}
sub setup_load {
my $self = shift;
print "##gff-version 3\n";
}
sub finish_load { }
1;
package main;
my ($DSN,$ADAPTOR,$CREATE,$USER,$PASSWORD,$FASTA,$ACC,$FILE,$PROXY,$STDOUT);
GetOptions ('dsn:s' => \$DSN,
'user:s' => \$USER,
'password:s' => \$PASSWORD,
'accession' => \$ACC,
'file' => \$FILE,
'proxy:s' => \$PROXY,
stdout => \$STDOUT,
create => \$CREATE) or die <<USAGE;
Usage: $0 [options] <gff file 1> <gff file 2> ...
Load a Bio::DB::GFF database from GFF files.
Options:
--create Force creation and initialization of database
--dsn <dsn> Data source (default dbi:mysql:test)
--user <user> Username for mysql authentication
--pass <password> Password for mysql authentication
--proxy <proxy> Proxy server to use for remote access
--file Arguments that follow are Genbank/EMBL file names (default)
--accession Arguments that follow are genbank accession numbers
This script loads a Bio::DB::GFF database with the features contained
in a either a local genbank file or an accession that is fetched from
genbank. Various command-line options allow you to control which
database to load and whether to allow an existing database to be
overwritten.
This script currently only uses MySQL, though it is a proof-of-
principle and could easily be extended to work with other RDMS
that are supported by GFF through adaptors.
USAGE
;
# some local defaults
$DSN ||= 'dbi:mysql:test';
$ADAPTOR = $STDOUT ? 'biofetch_to_stdout' : 'biofetch';
my @auth;
push @auth,(-user=>$USER) if defined $USER;
push @auth,(-pass=>$PASSWORD) if defined $PASSWORD;
push @auth,(-proxy=>$PROXY) if defined $PROXY;
my $db = Bio::DB::GFF->new(-adaptor=>$ADAPTOR,-dsn => $DSN,@auth)
or die "Can't open database: ",Bio::DB::GFF->error,"\n";
if ($CREATE) {
$db->initialize(1);
}
die "you must specify either an accession to retrieve from\nembl or a local file containing data in embl format\n"
unless @ARGV;
if ($ACC && !$FILE) {
while ($_ = shift) {
print STDERR "Loading $_...";
my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $_);
print STDERR $result ? "ok\n" : "failed\n";
}
} else {
while ($_ = shift) {
print STDERR "Loading $_...\n";
my $result = $db->load_from_file($_);
print STDERR $result ? "ok\n" : "failed\n";
}
}
|