/usr/share/pyshared/Bio/GenBank/LocationParser.py is in python-biopython 1.58-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | # Copyright 2001 Brad Chapman.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Code used for parsing GenBank/EMBL feature location strings (DEPRECATED)."""
# Don't issue a deprecation warning here, but via Bio.Parsers instead
# This avoids the user seeing multiple deprecation warnings.
# First pass at a parser for the location fields of a feature table.
# Everything likely to change.
#
# This does NOT cope with the Gap(), Gap(X), or Gap(unkXXX) tokens used
# in CONTIG lines, which are otherwise similar to feature locations.
#
# Based on the DDBJ/EMBL/GenBank Feature Table Definition Version 2.2
# Dec 15 1999 available from EBI, but the documentation is not
# completely internally consistent much less agree with real-life
# examples. Conflicts resolved to agree with real examples.
#
# This does NOT cope with the Gap(), Gap(X), or Gap(unkXXX) tokens used
# in CONTIG lines, which are otherwise similar to feature locations.
#
# Uses John Aycock's SPARK for parsing
from Bio.Parsers.spark import GenericScanner, GenericParser
class Token(object):
def __init__(self, type):
self.type = type
def __cmp__(self, other):
return cmp(self.type, other)
def __repr__(self):
return "Tokens(%r)" % (self.type,)
# "38"
class Integer(object):
type = "integer"
def __init__(self, val):
self.val = val
def __cmp__(self, other):
return cmp(self.type, other)
def __str__(self):
return str(self.val)
def __repr__(self):
return "Integer(%s)" % self.val
# From the BNF definition, this isn't needed. Does tht mean
# that bases can be refered to with negative numbers?
class UnsignedInteger(Integer):
type = "unsigned_integer"
def __repr__(self):
return "UnsignedInteger(%s)" % self.val
class Symbol(object):
type = "symbol"
def __init__(self, name):
self.name = name
def __cmp__(self, other):
return cmp(self.type, other)
def __str__(self):
return str(self.name)
def __repr__(self):
return "Symbol(%s)" % repr(self.name)
# ">38" -- The BNF says ">" is for the lower bound.. seems wrong to me
class LowBound(object):
def __init__(self, base):
self.base = base
def __repr__(self):
return "LowBound(%r)" % self.base
# "<38"
class HighBound(object):
def __init__(self, base):
self.base = base
def __repr__(self):
return "HighBound(%r)" % self.base
# 12.34
class TwoBound(object):
def __init__(self, low, high):
self.low = low
self.high = high
def __repr__(self):
return "TwoBound(%r, %r)" % (self.low, self.high)
# 12^34
class Between(object):
def __init__(self, low, high):
self.low = low
self.high = high
def __repr__(self):
return "Between(%r, %r)" % (self.low, self.high)
# 12..34
class Range(object):
def __init__(self, low, high):
self.low = low
self.high = high
def __repr__(self):
return "Range(%r, %r)" % (self.low, self.high)
class Function(object):
def __init__(self, name, args):
self.name = name
self.args = args
def __repr__(self):
return "Function(%r, %r)" % (self.name, self.args)
class AbsoluteLocation(object):
def __init__(self, path, local_location):
self.path = path
self.local_location = local_location
def __repr__(self):
return "AbsoluteLocation(%r, %r)" % (self.path, self.local_location)
class Path(object):
def __init__(self, database, accession):
self.database = database
self.accession = accession
def __repr__(self):
return "Path(%r, %r)" % (self.database, self.accession)
class FeatureName(object):
def __init__(self, path, label):
self.path = path
self.label = label
def __repr__(self):
return "FeatureName(%r, %r)" % (self.path, self.label)
class LocationScanner(GenericScanner):
def __init__(self):
GenericScanner.__init__(self)
def tokenize(self, input):
self.rv = []
GenericScanner.tokenize(self, input)
return self.rv
def t_double_colon(self, input):
r" :: "
self.rv.append(Token("double_colon"))
def t_double_dot(self, input):
r" \.\. "
self.rv.append(Token("double_dot"))
def t_dot(self, input):
r" \.(?!\.) "
self.rv.append(Token("dot"))
def t_caret(self, input):
r" \^ "
self.rv.append(Token("caret"))
def t_comma(self, input):
r" \, "
self.rv.append(Token("comma"))
def t_integer(self, input):
r" -?[0-9]+ "
self.rv.append(Integer(int(input)))
def t_unsigned_integer(self, input):
r" [0-9]+ "
self.rv.append(UnsignedInteger(int(input)))
def t_colon(self, input):
r" :(?!:) "
self.rv.append(Token("colon"))
def t_open_paren(self, input):
r" \( "
self.rv.append(Token("open_paren"))
def t_close_paren(self, input):
r" \) "
self.rv.append(Token("close_paren"))
def t_symbol(self, input):
r" [A-Za-z0-9_'*-][A-Za-z0-9_'*.-]* "
# Needed an extra '.'
self.rv.append(Symbol(input))
def t_less_than(self, input):
r" < "
self.rv.append(Token("less_than"))
def t_greater_than(self, input):
r" > "
self.rv.append(Token("greater_than"))
# punctuation .. hmm, isn't needed for location
# r''' [ !#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~] '''
class LocationParser(GenericParser):
def __init__(self, start='location'):
GenericParser.__init__(self, start)
self.begin_pos = 0
def p_location(self, args):
"""
location ::= absolute_location
location ::= feature_name
location ::= function
"""
return args[0]
def p_function(self, args):
"""
function ::= functional_operator open_paren location_list close_paren
"""
return Function(args[0].name, args[2])
def p_absolute_location(self, args):
"""
absolute_location ::= local_location
absolute_location ::= path colon local_location
"""
if len(args) == 1:
return AbsoluteLocation(None, args[-1])
return AbsoluteLocation(args[0], args[-1])
def p_path(self, args):
"""
path ::= database double_colon primary_accession
path ::= primary_accession
"""
if len(args) == 3:
return Path(args[0], args[2])
return Path(None, args[0])
def p_feature_name(self, args):
"""
feature_name ::= path colon feature_label
feature_name ::= feature_label
"""
if len(args) == 3:
return FeatureName(args[0], args[2])
return FeatureName(None, args[0])
def p_feature_label(self, args):
"""
label ::= symbol
"""
return args[0].name
def p_local_location(self, args):
"""
local_location ::= base_position
local_location ::= between_position
local_location ::= base_range
"""
return args[0]
def p_location_list(self, args):
"""
location_list ::= location
location_list ::= location_list comma location
"""
if len(args) == 1:
return args
return args[0] + [args[2]]
def p_functional_operator(self, args):
"""
functional_operator ::= symbol
"""
return args[0]
def p_base_position(self, args):
"""
base_position ::= integer
base_position ::= low_base_bound
base_position ::= high_base_bound
base_position ::= two_base_bound
"""
return args[0]
def p_low_base_bound(self, args):
"""
low_base_bound ::= greater_than integer
"""
return LowBound(args[1])
def p_high_base_bound(self, args):
"""
high_base_bound ::= less_than integer
"""
return HighBound(args[1])
def p_two_base_bound_1(self, args):
"""
two_base_bound ::= open_paren base_position dot base_position close_paren
"""
# main example doesn't have parens but others do.. (?)
return TwoBound(args[1], args[3])
def p_two_base_bound_2(self, args):
"""
two_base_bound ::= base_position dot base_position
"""
# two_base_bound with no parentheses like 1.6
return TwoBound(args[0], args[2])
def p_between_position(self, args):
"""
between_position ::= base_position caret base_position
"""
return Between(args[0], args[2])
def p_base_range(self, args):
"""
base_range ::= base_position double_dot base_position
base_range ::= function double_dot base_position
base_range ::= base_position double_dot function
base_range ::= function double_dot function
"""
return Range(args[0], args[2])
def p_database(self, args):
"""
database ::= symbol
"""
return args[0].name
def p_primary_accession(self, args):
"""
primary_accession ::= symbol
"""
return args[0].name
_cached_scanner = LocationScanner()
def scan(input):
"""Break a location string into a set of tokens"""
#scanner = LocationScanner()
#return scanner.tokenize(input)
return _cached_scanner.tokenize(input)
_cached_parser = LocationParser()
def parse(tokens):
"""Go from a set of tokens to an object representation"""
#print "I have", tokens
#parser = LocationParser()
#return parser.parse(tokens)
return _cached_parser.parse(tokens)
|