/usr/bin/bp_taxonomy2tree is in bioperl 1.6.901-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
=head1 NAME
taxonomy2tree - Building a taxonomic tree based on the full lineages of a set of species names
=head1 DESCRIPTION
taxonomy2tree.PLS: -s Orangutan -s Gorilla -s Chimpanzee -s Human
taxonomy2tree.PLS: -s Orangutan -s Gorilla -s Chimpanzee -s "Homo Sapiens"
Can also provide -d to specific the directory to store index files in, -o to
specific the location of your nodes file, and -a for the names file.
Or the option -e to use the web-based entrez taxonomy database if you don't
have the flatfiles installed.
This script requires that the bioperl-run pkg be also installed.
Providing the nodes.dmp and names.dmp files from the NCBI Taxonomy
dump (see Bio::DB::Taxonomy::flatfile for more info) is only necessary
on the first time running. This will create the local indexes and may
take quite a long time. However once created, these indexes will
allow fast access for species to taxon id OR taxon id to species name
lookups.
=head1 AUTHOR - Gabriel Valiente, reimplemented by Sendu Bala
Email valiente@lsi.upc.edu
Email bix@sendu.me.uk
=cut
use strict;
use Bio::DB::Taxonomy;
use Bio::TreeIO;
use Bio::Tree::Compatible;
use Getopt::Long;
my @species;
my $index_dir = "./db/";
my $nodesfile = "nodes.dmp";
my $namesfile = "names.dmp";
my $use_entrez = 0;
# the input to the script is an array of species names
GetOptions('s|species=s' => \@species, 'd|dir:s' => \$index_dir, 'o|nodesfile:s' => \$nodesfile, 'a|namesfile:s' => \$namesfile, 'e|entrez' => \$use_entrez);
my $db = new Bio::DB::Taxonomy(-source => $use_entrez ? 'entrez' : 'flatfile',
-directory => $index_dir,
-nodesfile => $nodesfile,
-namesfile => $namesfile);
# the full lineages of the species are merged into a single tree
my $tree;
for my $name (@species) {
my $ncbi_id = $db->get_taxonid($name);
if ($ncbi_id) {
my $node = $db->get_taxon(-taxonid => $ncbi_id);
if ($tree) {
$tree->merge_lineage($node);
}
else {
$tree = new Bio::Tree::Tree(-node => $node);
}
}
else {
warn "no NCBI Taxonomy node for species ",$name,"\n";
}
}
# simple paths are contracted by removing degree one nodes
$tree->contract_linear_paths;
# convert tree ids to their names for nice output with TreeIO
foreach my $node ($tree->get_nodes) {
$node->id($node->node_name);
}
# the tree is output in Newick format
my $output = new Bio::TreeIO(-format => 'newick');
$output->write_tree($tree);
1;
|