/usr/share/pyshared/cogent/parse/gff.py is in python-cogent 1.5.1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | #!/usr/bin/env python
__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2011, The Cogent Project"
__credits__ = ["Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.5.1"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"
def GffParser(f):
assert not isinstance(f, str)
for line in f:
# comments and blank lines
if "#" in line:
(line, comments) = line.split("#", 1)
else:
comments = None
line = line.strip()
if not line:
continue
# parse columns
cols = line.split('\t')
if len(cols) == 8:
cols.append('')
assert len(cols) == 9, line
(seqname, source, feature, start, end, score,
strand, frame, attributes) = cols
# adjust for python 0-based indexing etc.
(start, end) = (int(start) - 1, int(end))
# start is always meant to be less than end in GFF
# and in v 2.0, features that extend beyond sequence have negative
# indices
if start < 0 or end < 0:
start, end = abs(start), abs(end)
if start > end:
start, end = end, start
# but we use reversal of indices when the feature is on the opposite
# strand
if strand == '-':
(start, end) = (end, start)
# should parse attributes too
yield (seqname, source, feature, start, end, score,
strand, frame, attributes, comments)
def parse_attributes(attribute_string):
"""Returns region of attribute string between first pair of double quotes"""
attribute_string = attribute_string[attribute_string.find('"')+1:]
if '"' in attribute_string:
attribute_string = attribute_string[:attribute_string.find('"')]
return attribute_string
|