/usr/lib/python2.7/dist-packages/cogent/app/formatdb.py is in python-cogent 1.9-9.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | #!/usr/bin/env python
# Author: Greg Caporaso (gregcaporaso@gmail.com)
# formatdb.py
""" Description
File created on 16 Sep 2009.
"""
from __future__ import division
from optparse import OptionParser
from os.path import split, splitext
from os import remove
from glob import glob
from cogent.app.util import CommandLineApplication, ResultPath, get_tmp_filename
from cogent.app.parameters import ValuedParameter, FilePath
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Production"
class FormatDb(CommandLineApplication):
""" ApplicationController for formatting blast databases
Currently contains a minimal parameter set.
"""
_command = 'formatdb'
_parameters = {\
'-i':ValuedParameter(Prefix='-',Name='i',Delimiter=' ',IsPath=True),\
'-l':ValuedParameter(Prefix='-',Name='l',Delimiter=' ',IsPath=True),\
'-o':ValuedParameter(Prefix='-',Name='o',Delimiter=' ',Value='T'),\
'-p':ValuedParameter(Prefix='-',Name='p',Delimiter=' ',Value='F'),\
'-n':ValuedParameter(Prefix='-',Name='n',Delimiter=' ')
}
_input_handler = '_input_as_parameter'
_suppress_stdout = True
_suppress_stderr = True
def _input_as_parameter(self,data):
""" Set the input path and log path based on data (a fasta filepath)
"""
self.Parameters['-i'].on(data)
# access data through self.Parameters so we know it's been cast
# to a FilePath
input_filepath = self.Parameters['-i'].Value
input_file_dir, input_filename = split(input_filepath)
input_file_base, input_file_ext = splitext(input_filename)
# FIXME: the following all other options
# formatdb ignores the working directory if not name is passed.
self.Parameters['-l'].on(FilePath('%s.log') % input_filename)
self.Parameters['-n'].on(FilePath(input_filename))
return ''
def _get_result_paths(self,data):
""" Build the dict of result filepaths
"""
# access data through self.Parameters so we know it's been cast
# to a FilePath
wd = self.WorkingDir
db_name = self.Parameters['-n'].Value
log_name = self.Parameters['-l'].Value
result = {}
result['log'] = ResultPath(Path=wd + log_name, IsWritten=True)
if self.Parameters['-p'].Value == 'F':
extensions = ['nhr','nin','nsq','nsd','nsi']
else:
extensions = ['phr','pin','psq','psd','psi']
for extension in extensions:
for file_path in glob(wd + (db_name + '*' + extension)):
# this will match e.g. nr.01.psd and nr.psd
key = file_path.split(db_name + '.')[1]
result_path = ResultPath(Path=file_path, IsWritten=True)
result[key] = result_path
return result
def _accept_exit_status(self,exit_status):
""" Return True when the exit status was 0
"""
return exit_status == 0
def build_blast_db_from_fasta_path(fasta_path,is_protein=False,\
output_dir=None,HALT_EXEC=False):
"""Build blast db from fasta_path; return db name and list of files created
**If using to create temporary blast databases, you can call
cogent.util.misc.remove_files(db_filepaths) to clean up all the
files created by formatdb when you're done with the database.
fasta_path: path to fasta file of sequences to build database from
is_protein: True if working on protein seqs (default: False)
output_dir: directory where output should be written
(default: directory containing fasta_path)
HALT_EXEC: halt just before running the formatdb command and
print the command -- useful for debugging
"""
fasta_dir, fasta_filename = split(fasta_path)
if not output_dir:
output_dir = fasta_dir or '.'
# Will cd to this directory, so just pass the filename
# so the app is not confused by relative paths
fasta_path = fasta_filename
if not output_dir.endswith('/'):
db_name = output_dir + '/' + fasta_filename
else:
db_name = output_dir + fasta_filename
# instantiate the object
fdb = FormatDb(WorkingDir=output_dir,HALT_EXEC=HALT_EXEC)
if is_protein:
fdb.Parameters['-p'].on('T')
else:
fdb.Parameters['-p'].on('F')
app_result = fdb(fasta_path)
db_filepaths = []
for v in app_result.values():
try:
db_filepaths.append(v.name)
except AttributeError:
# not a file object, so no path to return
pass
return db_name, db_filepaths
def build_blast_db_from_fasta_file(fasta_file,is_protein=False,\
output_dir=None,HALT_EXEC=False):
"""Build blast db from fasta_path; return db name and list of files created
**If using to create temporary blast databases, you can call
cogent.util.misc.remove_files(db_filepaths) to clean up all the
files created by formatdb when you're done with the database.
fasta_path: path to fasta file of sequences to build database from
is_protein: True if working on protein seqs (default: False)
output_dir: directory where output should be written
(default: directory containing fasta_path)
HALT_EXEC: halt just before running the formatdb command and
print the command -- useful for debugging
"""
output_dir = output_dir or '.'
fasta_path = get_tmp_filename(\
tmp_dir=output_dir, prefix="BLAST_temp_db_", suffix=".fasta")
fasta_f = open(fasta_path,'w')
for line in fasta_file:
fasta_f.write('%s\n' % line.strip())
fasta_f.close()
blast_db, db_filepaths = build_blast_db_from_fasta_path(\
fasta_path, is_protein=is_protein, output_dir=None, HALT_EXEC=HALT_EXEC)
db_filepaths.append(fasta_path)
return blast_db, db_filepaths
def build_blast_db_from_seqs(seqs,is_protein=False,\
output_dir='./',HALT_EXEC=False):
"""Build blast db from seqs; return db name and list of files created
**If using to create temporary blast databases, you can call
cogent.util.misc.remove_files(db_filepaths) to clean up all the
files created by formatdb when you're done with the database.
seqs: sequence collection or alignment object
is_protein: True if working on protein seqs (default: False)
output_dir: directory where output should be written
(default: current directory)
HALT_EXEC: halt just before running the formatdb command and
print the command -- useful for debugging
"""
# Build a temp filepath
tmp_fasta_filepath = get_tmp_filename(\
prefix='Blast_tmp_db',suffix='.fasta')
# open the temp file
tmp_fasta_file = open(tmp_fasta_filepath,'w')
# write the sequence collection to file
tmp_fasta_file.write(seqs.toFasta())
tmp_fasta_file.close()
# build the bast database
db_name, db_filepaths = build_blast_db_from_fasta_path(\
tmp_fasta_filepath,is_protein=is_protein,\
output_dir=output_dir,HALT_EXEC=HALT_EXEC)
# clean-up the temporary file
remove(tmp_fasta_filepath)
# return the results
return db_name, db_filepaths
def parse_command_line_parameters():
""" Parses command line arguments """
usage = 'usage: %prog [options] fasta_filepath'
version = 'Version: %prog 0.1'
parser = OptionParser(usage=usage, version=version)
# A binary 'verbose' flag
parser.add_option('-p','--is_protein',action='store_true',\
dest='is_protein',default=False,\
help='Pass if building db of protein sequences '+\
'[default: False, nucleotide db]')
parser.add_option('-o','--output_dir',action='store',\
type='string',dest='output_dir',default=None,
help='the output directory '+\
'[default: directory containing input fasta_filepath]')
opts,args = parser.parse_args()
num_args = 1
if len(args) != num_args:
parser.error('Must provide single filepath to build database from.')
return opts,args
if __name__ == "__main__":
opts,args = parse_command_line_parameters()
fasta_filepath = args[0]
is_protein = opts.is_protein
output_dir = opts.output_dir
db_name, db_filepaths = build_blast_db_from_fasta_path(\
fasta_filepath,is_protein=is_protein,output_dir=output_dir)
|