/usr/lib/qiime/bin/denoise_wrapper.py is in qiime 1.3.0-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | #!/usr/bin/python
# File created on 09 Feb 2010
from __future__ import division
"""Denoising of 454 *.sff.txt files"""
__author__ = "Jens Reeder"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jens Reeder","Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.3.0"
__maintainer__ = "Jens Reeder"
__email__ = "jens.reeder@gmail.com"
__status__ = "Release"
from os.path import exists, splitext, split
from qiime.util import make_option
from numpy import array
from cogent.core.alignment import SequenceCollection
from cogent.app.util import ApplicationError
from qiime.util import parse_command_line_parameters, create_dir,\
handle_error_codes
from qiime.denoise_wrapper import fast_denoiser
from qiime.parse import parse_mapping_file
from qiime.format import write_Fasta_from_name_seq_pairs
script_info={}
script_info['brief_description']="""Denoise a flowgram file"""
script_info['script_description']="""This script will denoise a flowgram file in .sff.txt format, which is the output of sffinfo."""
script_info['script_usage']=[\
("""Example:""",
"""Denoise flowgrams in file 454Reads.sff.txt, discard flowgrams not in seqs.fna, and extract primer from map.txt:""",
"""%prog -i 454Reads.sff.txt -f seqs.fna -m map.txt"""),
("""Multi-core Example:""",
"""Denoise flowgrams in file 454Reads.sff.txt using 2 cores on your machine in parallel:""",
"""%prog -n 2 -i 454Reads.sff.txt -f seqs.fna -m map.txt""")
]
script_info['output_description']="""This script results in a OTU like mapping file along with a sequence file of denoised (FASTA-format). Note that the sequences coming from denoising are no real OTUs, and have to be sent to pick_otus.py if the users wishes to have a defined similarity threshold."""
script_info['required_options'] = [\
make_option('-i','--input_file', action='store',
type='string', dest='sff_fp',
help='path to flowgram files (.sff.txt), '+
'comma separated'),
make_option('-f','--fasta_file', action='store',
type='string', dest='fasta_fp',
help='path to fasta file from split_libraries.py')
]
script_info['optional_options'] = [\
make_option('-o','--output_dir', action='store',
type='string', dest='output_dir',
help='path to output directory '+
'[default: %default]',
default="denoised_seqs/"),
make_option('-n','--num_cpus', action='store',
type='int', dest='num_cpus',
help='number of CPUs '+\
'[default: %default]',
default=1),
make_option('--force_overwrite', action='store_true',
dest='force', default=False,
help='Overwrite files in output directory '+\
'[default: %default]'),
make_option('-m','--map_fname', action='store',
type='string', dest='map_fname',
help='name of mapping file, Has to contain '+\
'field LinkerPrimerSequence. '+\
'[REQUIRED unless --primer specified]'),
make_option('-p', '--primer',action='store',\
type='string',dest='primer',\
help='primer sequence '+\
'[REQUIRED unless --map_fname specified]',
default=None),
make_option('--titanium', action='store_true',
dest='titanium', default=False,
help='Select Titanium defaults for denoiser, '\
+'otherwise use FLX defaults '+\
'[default: %default]')
]
script_info['version'] = __version__
def main():
"""run denoiser on input flowgrams"""
option_parser, opts, args = parse_command_line_parameters(**script_info)
sff_files = opts.sff_fp.split(',')
for sff_fp in sff_files:
if (not exists(sff_fp)):
option_parser.error(('Flowgram file path does not exist:\n %s \n'+\
'Pass a valid one via -i.')% sff_fp)
outdir = opts.output_dir
ret_val = create_dir(outdir, handle_errors_externally=True)
if ret_val==1: #dir exists
if opts.force:
#do nothing, just overwrite content
pass
else:
raise ApplicationError, "Directory exists. Use --force to overwrite."
else:
handle_error_codes(outdir, error_code=ret_val)
log_fh=None
if (not (opts.primer or opts.map_fname)):
raise ApplicationError, "Either mapping file or primer required"
#Read primer from Meta data file if not set on command line
if not opts.primer:
mapping_data, header, comments = \
parse_mapping_file(open(opts.map_fname,"U"))
index = header.index("LinkerPrimerSequence")
all_primers = set(array(mapping_data)[:,index])
if len(all_primers)!= 1:
raise ValueError,"Currently only data sets with one primer are allowed.\n"+\
"Make separate mapping files with only one primer, re-run split_libraries and\n"\
+"denoise with each split_library output separately."
primer = list(all_primers)[0]
last_char = primer[-1]
if(last_char not in "ACGT"):
raise ValueError,"We currently do not support primer with "+\
"degenerate bases at it's 3' end."
else:
primer=opts.primer
centroids, cluster_mapping = fast_denoiser(opts.sff_fp,opts.fasta_fp,
outdir, opts.num_cpus, primer,
titanium=opts.titanium)
# store mapping file and centroids
result_otu_path = '%s/denoised_clusters.txt' % outdir
of = open(result_otu_path,'w')
for i,cluster in cluster_mapping.iteritems():
of.write('%s\t%s\n' % (str(i),'\t'.join(cluster)))
of.close()
result_fasta_path = '%s/denoised_seqs.fasta' % outdir
oh = open(result_fasta_path,'w')
write_Fasta_from_name_seq_pairs(centroids, oh)
if __name__ == "__main__":
main()
|