/usr/share/pyshared/PyMetrics/lexer.py is in pymetrics 0.8.1-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | """ Parsing classes.
$Id: lexer.py,v 1.2 2005/09/17 04:28:12 rcharney Exp $
"""
__version__ = "$Revision: 1.2 $"[11:-2]
__author__ = 'Reg. Charney <pymetrics@charneyday.com>'
import sys
import string
import cStringIO
import mytoken
import keyword
from globals import *
class ParseError(Exception):
pass
class Lexer:
""" Parse python source."""
def __init__( self ):
self.prevToktype = None
self.prevSemtype = None
self.prevToktext = None
def parse( self, inFileName ):
""" Read and parse the source. """
fd = open( inFileName )
try:
srcLines = fd.read()
self.srcLines = string.expandtabs( srcLines )
finally:
fd.close()
self.tokenlist = []
self.__computeOffsets()
self.__parseSource()
def __parseSource(self):
""" Parse the source in file."""
self.pos = 0
text = cStringIO.StringIO( self.srcLines )
try:
tokenize.tokenize( text.readline, self )
except tokenize.TokenError, ex:
msg = ex[0]
line = ex[1][0]
print line, self.srcLines[self.offset[line]:]
raise ParseError("ERROR %s\nLine %d:%s" % (
msg, line, self.srcLines[self.offset[line]:]))
def __computeOffsets(self):
""" Compute and store line offsets in self.offset. """
self.offset = [0, 0]
self.lineCount = 0
pos = 0
while pos < len( self.srcLines ):
self.lineCount += 1
pos = string.find( self.srcLines, '\n', pos ) + 1
if not pos: break
self.offset.append( pos )
self.offset.append( len( self.srcLines ) )
def __push(self, toktype, semtype, toktext, srow, scol, line):
"Append given token to final list of tokens."
self.tokenlist.append(mytoken.MyToken(type=toktype, semtype=semtype, text=toktext, row=srow, col=scol, line=line))
if toktype in [NEWLINE,INDENT,DEDENT,EMPTY,ENDMARKER]:
self.prevToktype = None
self.prevSemtype = None
self.prevToktext = None
elif toktype != WS:
self.prevToktype = toktype
self.prevSemtype = semtype
self.prevToktext = toktext
def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
""" MyToken handler."""
semtype = None
# calculate new positions
oldpos = self.pos
newpos = self.offset[srow] + scol
self.pos = newpos + len(toktext)
# check for extraneous '\r', usually produced in Windows and Mac systems
if toktype == ERRORTOKEN: # Python treats a '\r' as an error
if toktext in ['\r']:
toktext = ' '
toktype = WS
else:
msg = "Invalid character %s in line %d column %d\n" % (str.__repr__(toktext), srow, scol+1)
sys.stderr.writelines( msg )
sys.stdout.writelines( msg )
# next line is commented out so that invalid tokens are not output
# self.__push(toktype, None, toktext, srow, scol, line)
return
# handle newlines
if toktype in [NEWLINE, EMPTY]:
self.__push(toktype, None, '\n', srow, scol, line)
return
# send the original whitespace, if needed
# this is really a reconstruction based on last
# and current token positions and lengths.
if newpos > oldpos:
# srow scol is the starting position for the current
# token that follows the whitespace.
# srow sws is the computed starting position of the
# whitespace
sws = scol - ( newpos - oldpos )
self.__push(WS, None, self.srcLines[oldpos:newpos], srow, sws, line)
# skip tokens that indent/dedent
if toktype in [INDENT, DEDENT]:
self.pos = newpos
self.__push(toktype, None, '', srow, scol, line)
return
# map token type to one of ours and set semantic type, if possible
if token.LPAR <= toktype and toktype <= OP:
toktype = OP
if toktext == '@':
semtype = DECORATOR
elif toktype == NAME:
if keyword.iskeyword(toktext) or toktext == "self":
semtype = KEYWORD
else:
semtype = VARNAME
if self.prevToktext == "def":
semtype = FCNNAME
elif self.prevToktext == "class":
semtype = CLASSNAME
# add token
self.__push(toktype, semtype, toktext, srow, scol, line)
|