This file is indexed.

/usr/lib/python2.7/dist-packages/cogent/app/formatdb.py is in python-cogent 1.9-9.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/env python
# Author: Greg Caporaso (gregcaporaso@gmail.com)
# formatdb.py

""" Description
File created on 16 Sep 2009.

"""
from __future__ import division
from optparse import OptionParser
from os.path import split, splitext
from os import remove
from glob import glob
from cogent.app.util import CommandLineApplication, ResultPath, get_tmp_filename
from cogent.app.parameters import ValuedParameter, FilePath

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Production"

class FormatDb(CommandLineApplication):
    """ ApplicationController for formatting blast databases
    
        Currently contains a minimal parameter set.
    """

    _command = 'formatdb'
    _parameters = {\
     '-i':ValuedParameter(Prefix='-',Name='i',Delimiter=' ',IsPath=True),\
     '-l':ValuedParameter(Prefix='-',Name='l',Delimiter=' ',IsPath=True),\
     '-o':ValuedParameter(Prefix='-',Name='o',Delimiter=' ',Value='T'),\
     '-p':ValuedParameter(Prefix='-',Name='p',Delimiter=' ',Value='F'),\
     '-n':ValuedParameter(Prefix='-',Name='n',Delimiter=' ')
     }
    _input_handler = '_input_as_parameter'
    _suppress_stdout = True
    _suppress_stderr = True

    def _input_as_parameter(self,data):
        """ Set the input path and log path based on data (a fasta filepath)
        """
        self.Parameters['-i'].on(data)
        # access data through self.Parameters so we know it's been cast
        # to a FilePath
        input_filepath = self.Parameters['-i'].Value
        input_file_dir, input_filename = split(input_filepath)
        input_file_base, input_file_ext = splitext(input_filename)
        # FIXME: the following all other options
        # formatdb ignores the working directory if not name is passed.
        self.Parameters['-l'].on(FilePath('%s.log') % input_filename)
        self.Parameters['-n'].on(FilePath(input_filename))
        return ''

    def _get_result_paths(self,data):
        """ Build the dict of result filepaths
        """
        # access data through self.Parameters so we know it's been cast
        # to a FilePath
        wd = self.WorkingDir
        db_name = self.Parameters['-n'].Value
        log_name = self.Parameters['-l'].Value
        result = {}
        result['log'] = ResultPath(Path=wd + log_name, IsWritten=True)
        if self.Parameters['-p'].Value == 'F':
            extensions = ['nhr','nin','nsq','nsd','nsi']
        else:
            extensions = ['phr','pin','psq','psd','psi']
        for extension in extensions:
            for file_path in glob(wd + (db_name + '*' + extension)):
                # this will match e.g. nr.01.psd and nr.psd
                key = file_path.split(db_name + '.')[1]
                result_path = ResultPath(Path=file_path, IsWritten=True)
                result[key] = result_path
        return result

    def _accept_exit_status(self,exit_status):
        """ Return True when the exit status was 0
        """
        return exit_status == 0
        
def build_blast_db_from_fasta_path(fasta_path,is_protein=False,\
    output_dir=None,HALT_EXEC=False):
    """Build blast db from fasta_path; return db name and list of files created
    
        **If using to create temporary blast databases, you can call
        cogent.util.misc.remove_files(db_filepaths) to clean up all the
        files created by formatdb when you're done with the database.
    
        fasta_path: path to fasta file of sequences to build database from
        is_protein: True if working on protein seqs (default: False)
        output_dir: directory where output should be written
         (default: directory containing fasta_path)
        HALT_EXEC: halt just before running the formatdb command and
         print the command -- useful for debugging
    """
    fasta_dir, fasta_filename = split(fasta_path)
    if not output_dir:
        output_dir = fasta_dir or '.'
        # Will cd to this directory, so just pass the filename
        # so the app is not confused by relative paths
        fasta_path = fasta_filename
        
    if not output_dir.endswith('/'):
        db_name = output_dir + '/' + fasta_filename
    else:
        db_name = output_dir + fasta_filename

    # instantiate the object
    fdb = FormatDb(WorkingDir=output_dir,HALT_EXEC=HALT_EXEC)
    if is_protein:
        fdb.Parameters['-p'].on('T')
    else:
        fdb.Parameters['-p'].on('F')
    app_result = fdb(fasta_path)
    db_filepaths = []
    for v in app_result.values():
        try:
            db_filepaths.append(v.name)
        except AttributeError:
            # not a file object, so no path to return
            pass
    return db_name, db_filepaths
    
def build_blast_db_from_fasta_file(fasta_file,is_protein=False,\
    output_dir=None,HALT_EXEC=False):
    """Build blast db from fasta_path; return db name and list of files created
    
        **If using to create temporary blast databases, you can call
        cogent.util.misc.remove_files(db_filepaths) to clean up all the
        files created by formatdb when you're done with the database.
    
        fasta_path: path to fasta file of sequences to build database from
        is_protein: True if working on protein seqs (default: False)
        output_dir: directory where output should be written
         (default: directory containing fasta_path)
        HALT_EXEC: halt just before running the formatdb command and
         print the command -- useful for debugging
    """
    output_dir = output_dir or '.'
    fasta_path = get_tmp_filename(\
     tmp_dir=output_dir, prefix="BLAST_temp_db_", suffix=".fasta")
    
    fasta_f = open(fasta_path,'w')
    for line in fasta_file:
        fasta_f.write('%s\n' % line.strip())
    fasta_f.close()
    
    blast_db, db_filepaths = build_blast_db_from_fasta_path(\
     fasta_path, is_protein=is_protein, output_dir=None, HALT_EXEC=HALT_EXEC)
     
    db_filepaths.append(fasta_path)
    
    return blast_db, db_filepaths
    
def build_blast_db_from_seqs(seqs,is_protein=False,\
    output_dir='./',HALT_EXEC=False):
    """Build blast db from seqs; return db name and list of files created
    
        **If using to create temporary blast databases, you can call
        cogent.util.misc.remove_files(db_filepaths) to clean up all the
        files created by formatdb when you're done with the database.
    
        seqs: sequence collection or alignment object
        is_protein: True if working on protein seqs (default: False)
        output_dir: directory where output should be written
         (default: current directory)
        HALT_EXEC: halt just before running the formatdb command and
         print the command -- useful for debugging
    """
    
    # Build a temp filepath
    tmp_fasta_filepath = get_tmp_filename(\
     prefix='Blast_tmp_db',suffix='.fasta')
    # open the temp file
    tmp_fasta_file = open(tmp_fasta_filepath,'w')
    # write the sequence collection to file
    tmp_fasta_file.write(seqs.toFasta())
    tmp_fasta_file.close()
    
    # build the bast database
    db_name, db_filepaths = build_blast_db_from_fasta_path(\
     tmp_fasta_filepath,is_protein=is_protein,\
     output_dir=output_dir,HALT_EXEC=HALT_EXEC)
     
    # clean-up the temporary file
    remove(tmp_fasta_filepath)
    
    # return the results
    return db_name, db_filepaths


def parse_command_line_parameters():
    """ Parses command line arguments """
    usage = 'usage: %prog [options] fasta_filepath' 
    version = 'Version: %prog 0.1'
    parser = OptionParser(usage=usage, version=version)

    # A binary 'verbose' flag
    parser.add_option('-p','--is_protein',action='store_true',\
        dest='is_protein',default=False,\
        help='Pass if building db of protein sequences '+\
        '[default: False, nucleotide db]')

    parser.add_option('-o','--output_dir',action='store',\
          type='string',dest='output_dir',default=None,
          help='the output directory '+\
          '[default: directory containing input fasta_filepath]')

    opts,args = parser.parse_args()
    num_args = 1
    if len(args) != num_args:
       parser.error('Must provide single filepath to build database from.')

    return opts,args


if __name__ == "__main__":
    opts,args = parse_command_line_parameters()
    
    fasta_filepath = args[0]
    is_protein = opts.is_protein
    output_dir = opts.output_dir
    
    db_name, db_filepaths = build_blast_db_from_fasta_path(\
     fasta_filepath,is_protein=is_protein,output_dir=output_dir)