/usr/lib/python3/dist-packages/sphinx/pycode/__init__.py is in python3-sphinx 1.3.6-2ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | # -*- coding: utf-8 -*-
"""
sphinx.pycode
~~~~~~~~~~~~~
Utilities parsing and analyzing Python code.
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from __future__ import print_function
import re
import sys
from os import path
from six import iteritems, text_type, BytesIO, StringIO
from sphinx import package_dir
from sphinx.errors import PycodeError
from sphinx.pycode import nodes
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
from sphinx.util import get_module_source, detect_encoding
from sphinx.util.pycompat import TextIOWrapper
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
# load the Python grammar
_grammarfile = path.join(package_dir, 'pycode',
'Grammar-py%d.txt' % sys.version_info[0])
pygrammar = driver.load_grammar(_grammarfile)
pydriver = driver.Driver(pygrammar, convert=nodes.convert)
# an object with attributes corresponding to token and symbol names
class sym:
pass
for k, v in iteritems(pygrammar.symbol2number):
setattr(sym, k, v)
for k, v in iteritems(token.tok_name):
setattr(sym, v, k)
# a dict mapping terminal and nonterminal numbers to their names
number2name = pygrammar.number2symbol.copy()
number2name.update(token.tok_name)
_eq = nodes.Leaf(token.EQUAL, '=')
emptyline_re = re.compile('^\s*(#.*)?$')
class AttrDocVisitor(nodes.NodeVisitor):
"""
Visitor that collects docstrings for attribute assignments on toplevel and
in classes (class attributes and attributes set in __init__).
The docstrings can either be in special '#:' comments before the assignment
or in a docstring after it.
"""
def init(self, scope, encoding):
self.scope = scope
self.in_init = 0
self.encoding = encoding
self.namespace = []
self.collected = {}
self.tagnumber = 0
self.tagorder = {}
def add_tag(self, name):
name = '.'.join(self.namespace + [name])
self.tagorder[name] = self.tagnumber
self.tagnumber += 1
def visit_classdef(self, node):
"""Visit a class."""
self.add_tag(node[1].value)
self.namespace.append(node[1].value)
self.generic_visit(node)
self.namespace.pop()
def visit_funcdef(self, node):
"""Visit a function (or method)."""
# usually, don't descend into functions -- nothing interesting there
self.add_tag(node[1].value)
if node[1].value == '__init__':
# however, collect attributes set in __init__ methods
self.in_init += 1
self.generic_visit(node)
self.in_init -= 1
def visit_expr_stmt(self, node):
"""Visit an assignment which may have a special comment before (or
after) it.
"""
if _eq not in node.children:
# not an assignment (we don't care for augmented assignments)
return
# look *after* the node; there may be a comment prefixing the NEWLINE
# of the simple_stmt
parent = node.parent
idx = parent.children.index(node) + 1
while idx < len(parent):
if parent[idx].type == sym.SEMI:
idx += 1
continue # skip over semicolon
if parent[idx].type == sym.NEWLINE:
prefix = parent[idx].get_prefix()
if not isinstance(prefix, text_type):
prefix = prefix.decode(self.encoding)
docstring = prepare_commentdoc(prefix)
if docstring:
self.add_docstring(node, docstring)
return # don't allow docstrings both before and after
break
# now look *before* the node
pnode = node[0]
prefix = pnode.get_prefix()
# if the assignment is the first statement on a new indentation
# level, its preceding whitespace and comments are not assigned
# to that token, but the first INDENT or DEDENT token
while not prefix:
pnode = pnode.get_prev_leaf()
if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
break
prefix = pnode.get_prefix()
if not isinstance(prefix, text_type):
prefix = prefix.decode(self.encoding)
docstring = prepare_commentdoc(prefix)
self.add_docstring(node, docstring)
def visit_simple_stmt(self, node):
"""Visit a docstring statement which may have an assignment before."""
if node[0].type != token.STRING:
# not a docstring; but still need to visit children
return self.generic_visit(node)
prev = node.get_prev_sibling()
if not prev:
return
if prev.type == sym.simple_stmt and \
prev[0].type == sym.expr_stmt and _eq in prev[0].children:
# need to "eval" the string because it's returned in its
# original form
docstring = literals.evalString(node[0].value, self.encoding)
docstring = prepare_docstring(docstring)
self.add_docstring(prev[0], docstring)
def add_docstring(self, node, docstring):
# add an item for each assignment target
for i in range(0, len(node) - 1, 2):
target = node[i]
if self.in_init and self.number2name[target.type] == 'power':
# maybe an attribute assignment -- check necessary conditions
if ( # node must have two children
len(target) != 2 or
# first child must be "self"
target[0].type != token.NAME or target[0].value != 'self' or
# second child must be a "trailer" with two children
self.number2name[target[1].type] != 'trailer' or
len(target[1]) != 2 or
# first child must be a dot, second child a name
target[1][0].type != token.DOT or
target[1][1].type != token.NAME):
continue
name = target[1][1].value
elif target.type != token.NAME:
# don't care about other complex targets
continue
else:
name = target.value
self.add_tag(name)
if docstring:
namespace = '.'.join(self.namespace)
if namespace.startswith(self.scope):
self.collected[namespace, name] = docstring
class ModuleAnalyzer(object):
# cache for analyzer objects -- caches both by module and file name
cache = {}
@classmethod
def for_string(cls, string, modname, srcname='<string>'):
if isinstance(string, bytes):
return cls(BytesIO(string), modname, srcname)
return cls(StringIO(string), modname, srcname, decoded=True)
@classmethod
def for_file(cls, filename, modname):
if ('file', filename) in cls.cache:
return cls.cache['file', filename]
try:
fileobj = open(filename, 'rb')
except Exception as err:
raise PycodeError('error opening %r' % filename, err)
obj = cls(fileobj, modname, filename)
cls.cache['file', filename] = obj
return obj
@classmethod
def for_module(cls, modname):
if ('module', modname) in cls.cache:
entry = cls.cache['module', modname]
if isinstance(entry, PycodeError):
raise entry
return entry
try:
type, source = get_module_source(modname)
if type == 'string':
obj = cls.for_string(source, modname)
else:
obj = cls.for_file(source, modname)
except PycodeError as err:
cls.cache['module', modname] = err
raise
cls.cache['module', modname] = obj
return obj
def __init__(self, source, modname, srcname, decoded=False):
# name of the module
self.modname = modname
# name of the source file
self.srcname = srcname
# file-like object yielding source lines
self.source = source
# cache the source code as well
pos = self.source.tell()
if not decoded:
self.encoding = detect_encoding(self.source.readline)
self.source.seek(pos)
self.code = self.source.read().decode(self.encoding)
self.source.seek(pos)
self.source = TextIOWrapper(self.source, self.encoding)
else:
self.encoding = None
self.code = self.source.read()
self.source.seek(pos)
# will be filled by tokenize()
self.tokens = None
# will be filled by parse()
self.parsetree = None
# will be filled by find_attr_docs()
self.attr_docs = None
self.tagorder = None
# will be filled by find_tags()
self.tags = None
def tokenize(self):
"""Generate tokens from the source."""
if self.tokens is not None:
return
try:
self.tokens = list(tokenize.generate_tokens(self.source.readline))
except tokenize.TokenError as err:
raise PycodeError('tokenizing failed', err)
self.source.close()
def parse(self):
"""Parse the generated source tokens."""
if self.parsetree is not None:
return
self.tokenize()
try:
self.parsetree = pydriver.parse_tokens(self.tokens)
except parse.ParseError as err:
raise PycodeError('parsing failed', err)
def find_attr_docs(self, scope=''):
"""Find class and module-level attributes and their documentation."""
if self.attr_docs is not None:
return self.attr_docs
self.parse()
attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
attr_visitor.visit(self.parsetree)
self.attr_docs = attr_visitor.collected
self.tagorder = attr_visitor.tagorder
# now that we found everything we could in the tree, throw it away
# (it takes quite a bit of memory for large modules)
self.parsetree = None
return attr_visitor.collected
def find_tags(self):
"""Find class, function and method definitions and their location."""
if self.tags is not None:
return self.tags
self.tokenize()
result = {}
namespace = []
stack = []
indent = 0
defline = False
expect_indent = False
emptylines = 0
def tokeniter(ignore = (token.COMMENT,)):
for tokentup in self.tokens:
if tokentup[0] not in ignore:
yield tokentup
tokeniter = tokeniter()
for type, tok, spos, epos, line in tokeniter:
if expect_indent and type != token.NL:
if type != token.INDENT:
# no suite -- one-line definition
assert stack
dtype, fullname, startline, _ = stack.pop()
endline = epos[0]
namespace.pop()
result[fullname] = (dtype, startline, endline - emptylines)
expect_indent = False
if tok in ('def', 'class'):
name = next(tokeniter)[1]
namespace.append(name)
fullname = '.'.join(namespace)
stack.append((tok, fullname, spos[0], indent))
defline = True
elif type == token.INDENT:
expect_indent = False
indent += 1
elif type == token.DEDENT:
indent -= 1
# if the stacklevel is the same as it was before the last
# def/class block, this dedent closes that block
if stack and indent == stack[-1][3]:
dtype, fullname, startline, _ = stack.pop()
endline = spos[0]
namespace.pop()
result[fullname] = (dtype, startline, endline - emptylines)
elif type == token.NEWLINE:
# if this line contained a definition, expect an INDENT
# to start the suite; if there is no such INDENT
# it's a one-line definition
if defline:
defline = False
expect_indent = True
emptylines = 0
elif type == token.NL:
# count up if line is empty or comment only
if emptyline_re.match(line):
emptylines += 1
else:
emptylines = 0
self.tags = result
return result
if __name__ == '__main__':
import time
import pprint
x0 = time.time()
# ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
ma = ModuleAnalyzer.for_file('sphinx/environment.py',
'sphinx.environment')
ma.tokenize()
x1 = time.time()
ma.parse()
x2 = time.time()
# for (ns, name), doc in iteritems(ma.find_attr_docs()):
# print '>>', ns, name
# print '\n'.join(doc)
pprint.pprint(ma.find_tags())
x3 = time.time()
# print nodes.nice_repr(ma.parsetree, number2name)
print("tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2))
|