This file is indexed.

/usr/lib/python2.7/dist-packages/cogent/app/fastq_join.py is in python-cogent 1.9-9.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python
# file: fastq_join.py

# Application controller for ea-utils v1.1.2-537 
# fastq processing utilities
# http://code.google.com/p/ea-utils/
# 

from cogent.app.parameters import ValuedParameter, FlagParameter
from cogent.app.util import CommandLineApplication, ResultPath, \
    ApplicationError
import os 
import tempfile
import shutil

__author__ = "Michael Robeson"
__copyright__ = "Copyright 2007-2013, The Cogent Project"
__credits__ = ["Michael Robeson"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Michael Robeson"
__email__ = "robesonms@ornl.gov"
__status__ = "Development"

class FastqJoin(CommandLineApplication):
    """fastq-join (v1.1.2) application controller for joining paired-end reads."""
    
    _command = 'fastq-join'
    
    _parameters = {
    # Description copied from 'fastq-join'
    # Usage: fastq-join [options] <read1.fq> <read2.fq> [mate.fq] -o <read.%.fq>
    
    # Output: 
    # You can supply 3 -o arguments, for un1, un2, join files, or one 
    # argument as a file name template.  The suffix 'un1, un2, or join' is 
    # appended to the file, or they replace a %-character if present.
    # If a 'mate' input file is present (barcode read), then the files
    # 'un3' and 'join2' are also created.
    
    # we'll only handle one output base path / file name
    # -o FIL:  See 'Output' above
    '-o':ValuedParameter(Prefix='-', Delimiter=' ', Name='o'),

    # -v C:  Verifies that the 2 files probe id's match up to char C
    # use ' ' (space) for Illumina reads
    '-v':ValuedParameter(Prefix='-', Delimiter=' ', Name='v'),

    # -p N:  N-percent maximum difference (8)
    '-p':ValuedParameter(Prefix='-', Delimiter=' ', Name='p'),
    
    # -m N:  N-minimum overlap (6)
    '-m':ValuedParameter(Prefix='-', Delimiter=' ', Name='m'),
   
    # -r FIL:  Verbose stitch length report
    '-r':ValuedParameter(Prefix='-', Delimiter=' ', Name='r')}

    _input_handler = '_input_as_paths'

    def _get_output_path(self):
        """Checks if a base file label / path is set. Returns absolute path."""
        if self.Parameters['-o'].isOn():
            output_path = self._absolute(str(self.Parameters['-o'].Value))
        else:
            raise ValueError, "No output path specified."
        return output_path

    def _get_stitch_report_path(self):
        """Checks if stitch report label / path is set. Returns absolute path."""
        if self.Parameters['-r'].isOn():
            stitch_path = self._absolute(str(self.Parameters['-r'].Value))
            return stitch_path
        elif self.Parameters['-r'].isOff():
            return None

    def _get_result_paths(self, data):
        """Capture fastq-join output.
        
        Three output files are produced, in the form of
            outputjoin : assembled paired reads
            outputun1 : unassembled reads_1
            outputun2 : unassembled reads_2

        If a barcode / mate-pairs file is also provided then the following 
        additional files are output:
            outputjoin2
            outputun3

        If a verbose stitch length report (-r) is chosen to be written by the 
        user then use a user specified filename.
        """
        output_path = self._get_output_path()
        
        result = {}

        # always output:
        result['Assembled'] = ResultPath(Path = output_path + 'join',
                                         IsWritten=True)
        result['UnassembledReads1']  = ResultPath(Path = output_path + 'un1',
                                                  IsWritten=True)
        result['UnassembledReads2']  = ResultPath(Path = output_path + 'un2',
                                                  IsWritten=True)
       
        # check if stitch report is requested:
        stitch_path = self._get_stitch_report_path()
        if stitch_path:
            result['Report'] = ResultPath(Path = stitch_path,
			                              IsWritten=True)

        # Check if mate file / barcode file is present.
        # If not, return result
        # We need to check this way becuase there are no infile parameters.
        mate_path_string = output_path + 'join2'
        mate_unassembled_path_string = output_path + 'un3'
        if os.path.exists(mate_path_string) and \
            os.path.exists(mate_unassembled_path_string):
            result['Mate'] = ResultPath(Path = mate_path_string, 
                                        IsWritten=True)
            result['MateUnassembled'] = ResultPath(Path = 
                                                   mate_unassembled_path_string,
                                                   IsWritten=True)
        else:
            pass
        return result


    def getHelp(self):
        """fastq-join (v1.1.2) help"""
        help_str = """
        For issues with the actual program 'fastq-join', see the following:
    
        For basic help, type the following at the command line:
            'fastq-join'

        Website:
           http://code.google.com/p/ea-utils/

        For questions / comments subit an issue to:
        http://code.google.com/p/ea-utils/issues/list
        """
        return help_str


def join_paired_end_reads_fastqjoin(
    reads1_infile_path,
    reads2_infile_path,
    perc_max_diff=None, # typical default is 8
    min_overlap=None, # typical default is 6
    outfile_label = 'fastqjoin',
    params={},    
    working_dir=tempfile.gettempdir(),
    SuppressStderr=True,
    SuppressStdout=True,
    HALT_EXEC=False): 
    """ Runs fastq-join, with default parameters to assemble paired-end reads.
        Returns file path string.

        -reads1_infile_path : reads1.fastq infile path
        -reads2_infile_path : reads2.fastq infile path
        -perc_max_diff : maximum % diff of overlap differences allowed 
        -min_overlap : minimum allowed overlap required to assemble reads
        -outfile_label : base name for output files.
        -params : dictionary of application controller parameters

    """    
    abs_r1_path = os.path.abspath(reads1_infile_path)
    abs_r2_path = os.path.abspath(reads2_infile_path)
     
    infile_paths = [abs_r1_path, abs_r2_path]

    # check / make absolute infile paths
    for p in infile_paths:
        if not os.path.exists(p):
            raise IOError, 'File not found at: %s' % p
  
    fastq_join_app = FastqJoin(params=params,
                               WorkingDir=working_dir,
                               SuppressStderr=SuppressStderr,
                               SuppressStdout=SuppressStdout,
                               HALT_EXEC=HALT_EXEC)
  
    # set param. Helps with QIIME integration to have these values
    # set to None by default. This way we do not have to worry
    # about changes in default behaviour of the wrapped
    # application
    if perc_max_diff is not None:
        if isinstance(perc_max_diff, int) and 0 <= perc_max_diff <= 100: 
            fastq_join_app.Parameters['-p'].on(perc_max_diff)
        else:
            raise ValueError, "perc_max_diff must be int between 0-100!"

    if min_overlap is not None:
        if isinstance(min_overlap, int) and 0 < min_overlap: 
            fastq_join_app.Parameters['-m'].on(min_overlap)
        else:
            raise ValueError, "min_overlap must be an int >= 0!"

    if outfile_label is not None:
        if isinstance(outfile_label, str): 
            fastq_join_app.Parameters['-o'].on(outfile_label +'.')
        else:
            raise ValueError, "outfile_label must be a string!"
    else:
        pass
  
    # run assembler
    result = fastq_join_app(infile_paths)
    
    # Store output file path data to dict    
    path_dict = {}
    path_dict['Assembled'] = result['Assembled'].name
    path_dict['UnassembledReads1'] = result['UnassembledReads1'].name
    path_dict['UnassembledReads2'] = result['UnassembledReads2'].name
   
    # sanity check that files actually exist in path lcoations
    for path in path_dict.values():
        if not os.path.exists(path):
            raise IOError, 'Output file not found at: %s' % path

    # fastq-join automatically appends: 'join', 'un1', or 'un2'
    # to the end of the file names. But we want to rename them so
    # they end in '.fastq'. So, we iterate through path_dict to
    # rename the files and overwrite the dict values.
    for key,file_path in path_dict.items():
        new_file_path = file_path + '.fastq'
        shutil.move(file_path, new_file_path)
        path_dict[key] = new_file_path

    # sanity check that files actually exist in path lcoations
    for path in path_dict.values():
        if not os.path.exists(path):
            raise IOError, 'Output file not found at: %s' % path

    return path_dict