This file is indexed.

/usr/share/pyshared/cogent/parse/comrna.py is in python-cogent 1.5.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/usr/bin/env python
#file comrna_parser.py
"""Parser for comRNA output format

To reduce number of structures that parser report set first to True, then parser
will only report structures from first block (Maximum stem similarity score block). 

The function common can be used to have the most common occuring structure be 
reported first, if all structure only occurs once the structure with the most pairs 
will be reported as the most common. 
"""
from cogent.util.transform import make_trans
from cogent.struct.rna2d   import Pairs
from cogent.struct.knots   import opt_single_random
from string                import index

__author__ = "Shandy Wikman"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__contributors__ = ["Shandy Wikman"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Shandy Wikman"
__email__ = "ens01svn@cs.umu.se"
__status__ = "Development"

def comRNA_parser(lines=None,pseudo=True,first=False):
    """Parsed comRNA output.

    pseudo - if True, report results with pseudoknots; if flag
             is False pseudoknots will be removed.
    """
    names = get_names(lines)   
    result = []
    
    for block in minimalComrnaParser(lines):
        for structure in blockParser(block):
            for struct in structParser(structure):
                for pairs,seq in pairsParser(struct,names):
                    result.append([seq,pairs])
                if first:    
                    break
            if first:
                break
        if first:
            break

    if not pseudo:
        tmp = []
        for block in result:
            tmp.append([block[0],opt_single_random(block[-1])])
        result = tmp
    return result

def common(structs):
    """
    Will return a list of sequences and structures with the most common structure 
    first in the list. (rest or list unordered!)
    Don't care which of the sequences for the "winning" sequence that is reported
    since the are not ranked amongst them self.
    """
    
    frequency = {}
    v = 0
    indx = 0
    result = []
    tmp_list = [] #lookup the seq for the structures,dont care which winner seq 
    key = []
    for block in structs:
        tmp_list.extend(block)
        p = tuple(block[-1])
        if frequency.__contains__(p): #everytime struct p appears count up by 1
            frequency[p]+=1
        else:
            frequency[p]=1
        nr = frequency[p]
        if nr > v: #Which struct appears most times
            v = nr
            key = p

    #if winning structure has frequency == 1 all structure apper only once
    if frequency[key]==1:
        longest = 0
        for block in structs:
            l = len(block[-1])
            if l > longest: #pick longest sequence as the winner
                key = tuple(block[-1])
    
    winner = Pairs(key)
    indx = tmp_list.index(winner)-1
    result.append([tmp_list[indx],winner]) #adds the most common structure first
    del frequency[key]
    for i in frequency.keys(): #rest of structures added
        i = Pairs(i)
        indx = tmp_list.index(i)-1
        result.append([tmp_list[indx],i])
        
    return result

def get_names(lines):
    """
    Retrieves the names of the sequences in the output.
    """ 
    next = False 
    names = []
    for line in lines:
        if next:
            if len(line) == 1:
                break
            else:
                tmp = line.split()
                names.append(tmp[1])
        if line.startswith('Sequences loaded ...'):
            next = True
    return names

def minimalComrnaParser(lines):
    """
    Parses the output file in to blocks depending on the S score
    S score is the Maximum stem similarity score.
    """
    block = []
    first = True
    record = False
    for line in lines:
        if line.startswith('===========================  S ='):
            record = True
            if not first:
                yield block
                block = []
            first = False
        if record:    
            block.append(line)
    yield block

def blockParser(block):
    """
    Parses every block of S scores in to blocks of structures
    every S score block has 10 or less structures
    """
    struct = []
    first = True
    record = False
    for line in block:
        if line.startswith('Structure #'):
            record = True
            if not first:
                yield struct
                struct = []
            first = False
        if record:
            struct.append(line)
    yield struct

def structParser(lines):
    """
    Parses a structure block into a block containing the sequens and structures
    lines.
    """
    blc = 0 #blank line counter
    bc = 0 #block counter
    struct = []
    record  = False
    for line in lines:
        if len(line) == 1:
            blc +=1
            record = False
        if blc == 2:
            blc = 0
            bc +=1
            record = True
        if record and bc < 3:
            struct.append(line)

    yield struct

            
def pairsParser(seqBlock,names):
    """
    Takes a structure block and parse that into structures
    """
    for name in names:
        seq = []
        sIndx = [] #start index, where in the line the sequence start
        struct = [] #structure lines
        record = False
        for line in seqBlock:
            if line.startswith(name+' '):
                tmp = line.split()
                #if seq length is shorter then 80 for one seq and longer
                #for another seq the following block will be empty for the
                #shorter sequence. this if statement protects against that
                if len(tmp) == 4: 
                    try:
                        seq.append(tmp[2])#[name,start nr,seq,end nr]
                    except:
                        print 'LINE',line
                        print 'BLOCK', seqBlock
                    sIndx.append(index(line,tmp[2]))            
                    record = True
                else:
                    continue
            else:
                if record:
                    record = False
                    struct.append(line)

###############################################################################
# Construction of the full sequence and structure and then mapping each letter
#in structure to a position

        Fseq = '' #full sequence
        Fstruct = '' #full structure
        for i in range(len(seq)):
            # slice out corresponding structure to sequence
            #so you can get the same index for structure and sequence
            tmpStruct = struct[i][sIndx[i]:(sIndx[i]+len(seq[i]))]
            Fseq = ''.join([Fseq,seq[i]])
            Fstruct = ''.join([Fstruct,tmpStruct])
        #Applies a position to every letter in structure sequence    
        letterPos = zip(range(len(Fseq)),Fstruct)
        
###############################################################################
#Cunstruction of dictionary for where every letter in structure has a list of
#positions corresponding to that of that letter in respect to the sequence

        alphabet = {}
        for pos, letter in letterPos:
            indices = []
            #if the dict contains the letter you want to add to that list
            if alphabet.__contains__(letter): 
                indices = alphabet[letter]
                indices.append(pos)
                alphabet[letter] = indices
            #else you want to create a new list for that letter
            elif not letter==' ':
                indices.append(pos)
                alphabet[letter] = indices
                
###############################################################################
#Each list in alphabet needs to be split in two,
#oL and cL (open and close list), to be able to fold the positions into pairs

        pairs = []
        for value in alphabet.values():
            middle = len(value)/2
            oL = value[:middle]
            cL = value[middle:]
            #pairs are created by making a tuple of the first in oL to
            #the last in cl, second in oL to second last in cL and so on
            pairs.extend(zip(oL,cL.__reversed__()))

        yield Pairs(pairs),Fseq