This file is indexed.

/usr/lib/qiime/bin/denoise_wrapper.py is in qiime 1.3.0-3.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/python
# File created on 09 Feb 2010
from __future__ import division

"""Denoising of 454 *.sff.txt files"""

__author__ = "Jens Reeder"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jens Reeder","Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.3.0"
__maintainer__ = "Jens Reeder"
__email__ = "jens.reeder@gmail.com"
__status__ = "Release"
 
from os.path import exists, splitext, split
from qiime.util import make_option
from numpy import array

from cogent.core.alignment import SequenceCollection
from cogent.app.util import ApplicationError

from qiime.util import parse_command_line_parameters, create_dir,\
    handle_error_codes
from qiime.denoise_wrapper import fast_denoiser
from qiime.parse import parse_mapping_file
from qiime.format import  write_Fasta_from_name_seq_pairs

script_info={}
script_info['brief_description']="""Denoise a flowgram file"""
script_info['script_description']="""This script will denoise a flowgram file in .sff.txt format, which is the output of sffinfo."""

script_info['script_usage']=[\
    ("""Example:""",
     """Denoise flowgrams in file 454Reads.sff.txt, discard flowgrams not in seqs.fna, and extract primer from map.txt:""",
     """%prog -i 454Reads.sff.txt -f seqs.fna -m map.txt"""),

    ("""Multi-core Example:""",
     """Denoise flowgrams in file 454Reads.sff.txt using 2 cores on your machine in parallel:""",
     """%prog -n 2 -i 454Reads.sff.txt -f seqs.fna -m map.txt""")
    ]

script_info['output_description']="""This script results in a OTU like mapping file along with a sequence file of denoised (FASTA-format). Note that the sequences coming from denoising are no real OTUs, and have to be sent to pick_otus.py if the users wishes to have a defined similarity threshold."""

script_info['required_options'] = [\
    make_option('-i','--input_file', action='store',
                type='string', dest='sff_fp',
                help='path to flowgram files (.sff.txt), '+
                'comma separated'),

    make_option('-f','--fasta_file', action='store',
                type='string', dest='fasta_fp',
                help='path to fasta file from split_libraries.py')
    ]

script_info['optional_options'] = [\
    make_option('-o','--output_dir', action='store',
                type='string', dest='output_dir',
                help='path to output directory '+
                '[default: %default]',
                default="denoised_seqs/"),

    make_option('-n','--num_cpus', action='store',
                type='int', dest='num_cpus',
                help='number of CPUs '+\
                    '[default: %default]',
                default=1),

    make_option('--force_overwrite', action='store_true',
                 dest='force', default=False,
                 help='Overwrite files in output directory '+\
                    '[default: %default]'),

    make_option('-m','--map_fname', action='store',
                type='string', dest='map_fname',
                help='name of mapping file, Has to contain '+\
                    'field LinkerPrimerSequence. '+\
                    '[REQUIRED unless --primer specified]'),
    
    make_option('-p', '--primer',action='store',\
                    type='string',dest='primer',\
                    help='primer sequence '+\
                    '[REQUIRED unless --map_fname specified]',
                default=None),

    make_option('--titanium', action='store_true',
                dest='titanium', default=False,
                help='Select Titanium defaults for denoiser, '\
                    +'otherwise use FLX defaults '+\
                    '[default: %default]')
    ]

script_info['version'] = __version__

def main():
    """run denoiser on input flowgrams"""
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    sff_files = opts.sff_fp.split(',')
    
    for sff_fp in sff_files:
        if (not exists(sff_fp)):
            option_parser.error(('Flowgram file path does not exist:\n %s \n'+\
                                 'Pass a valid one via -i.')% sff_fp)
    outdir = opts.output_dir

    ret_val = create_dir(outdir, handle_errors_externally=True)  
    if ret_val==1:  #dir exists
        if opts.force:
            #do nothing, just overwrite content
            pass
        else:
            raise ApplicationError, "Directory exists. Use --force to overwrite."
    else:
        handle_error_codes(outdir, error_code=ret_val)

    log_fh=None


    if (not (opts.primer or opts.map_fname)):
        raise ApplicationError, "Either mapping file or primer required"
    #Read primer from Meta data file if not set on command line
    if not opts.primer:
      mapping_data, header, comments = \
          parse_mapping_file(open(opts.map_fname,"U"))
        
      index = header.index("LinkerPrimerSequence")
      all_primers = set(array(mapping_data)[:,index])
      
      if len(all_primers)!= 1:
            raise ValueError,"Currently only data sets with one primer are allowed.\n"+\
                "Make separate mapping files with only one primer, re-run split_libraries and\n"\
                +"denoise with each split_library output separately."
      primer = list(all_primers)[0]
      last_char = primer[-1]
      if(last_char not in "ACGT"):
          raise ValueError,"We currently do not support primer with "+\
              "degenerate bases at it's 3' end."

    else:
        primer=opts.primer

    centroids, cluster_mapping = fast_denoiser(opts.sff_fp,opts.fasta_fp,
                                               outdir, opts.num_cpus, primer,
                                               titanium=opts.titanium)

    # store mapping file and centroids
    result_otu_path = '%s/denoised_clusters.txt' % outdir
    of = open(result_otu_path,'w')
    for i,cluster in cluster_mapping.iteritems():
        of.write('%s\t%s\n' % (str(i),'\t'.join(cluster)))
    of.close()
    
    result_fasta_path = '%s/denoised_seqs.fasta' % outdir
    oh = open(result_fasta_path,'w')
    write_Fasta_from_name_seq_pairs(centroids, oh)

if __name__ == "__main__":
    main()