/usr/lib/python2.7/dist-packages/html5lib/treewalkers/_base.py is in python-html5lib 0.999-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | from __future__ import absolute_import, division, unicode_literals
from six import text_type, string_types
import gettext
_ = gettext.gettext
from xml.dom import Node
DOCUMENT = Node.DOCUMENT_NODE
DOCTYPE = Node.DOCUMENT_TYPE_NODE
TEXT = Node.TEXT_NODE
ELEMENT = Node.ELEMENT_NODE
COMMENT = Node.COMMENT_NODE
ENTITY = Node.ENTITY_NODE
UNKNOWN = "<#UNKNOWN#>"
from ..constants import voidElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
def to_text(s, blank_if_none=True):
"""Wrapper around six.text_type to convert None to empty string"""
if s is None:
if blank_if_none:
return ""
else:
return None
elif isinstance(s, text_type):
return s
else:
return text_type(s)
def is_text_or_none(string):
"""Wrapper around isinstance(string_types) or is None"""
return string is None or isinstance(string, string_types)
class TreeWalker(object):
def __init__(self, tree):
self.tree = tree
def __iter__(self):
raise NotImplementedError
def error(self, msg):
return {"type": "SerializeError", "data": msg}
def emptyTag(self, namespace, name, attrs, hasChildren=False):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
assert isinstance(name, string_types), type(name)
assert all((namespace is None or isinstance(namespace, string_types)) and
isinstance(name, string_types) and
isinstance(value, string_types)
for (namespace, name), value in attrs.items())
yield {"type": "EmptyTag", "name": to_text(name, False),
"namespace": to_text(namespace),
"data": attrs}
if hasChildren:
yield self.error(_("Void element has children"))
def startTag(self, namespace, name, attrs):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
assert isinstance(name, string_types), type(name)
assert all((namespace is None or isinstance(namespace, string_types)) and
isinstance(name, string_types) and
isinstance(value, string_types)
for (namespace, name), value in attrs.items())
return {"type": "StartTag",
"name": text_type(name),
"namespace": to_text(namespace),
"data": dict(((to_text(namespace, False), to_text(name)),
to_text(value, False))
for (namespace, name), value in attrs.items())}
def endTag(self, namespace, name):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
assert isinstance(name, string_types), type(namespace)
return {"type": "EndTag",
"name": to_text(name, False),
"namespace": to_text(namespace),
"data": {}}
def text(self, data):
assert isinstance(data, string_types), type(data)
data = to_text(data)
middle = data.lstrip(spaceCharacters)
left = data[:len(data) - len(middle)]
if left:
yield {"type": "SpaceCharacters", "data": left}
data = middle
middle = data.rstrip(spaceCharacters)
right = data[len(middle):]
if middle:
yield {"type": "Characters", "data": middle}
if right:
yield {"type": "SpaceCharacters", "data": right}
def comment(self, data):
assert isinstance(data, string_types), type(data)
return {"type": "Comment", "data": text_type(data)}
def doctype(self, name, publicId=None, systemId=None, correct=True):
assert is_text_or_none(name), type(name)
assert is_text_or_none(publicId), type(publicId)
assert is_text_or_none(systemId), type(systemId)
return {"type": "Doctype",
"name": to_text(name),
"publicId": to_text(publicId),
"systemId": to_text(systemId),
"correct": to_text(correct)}
def entity(self, name):
assert isinstance(name, string_types), type(name)
return {"type": "Entity", "name": text_type(name)}
def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType)
class NonRecursiveTreeWalker(TreeWalker):
def getNodeDetails(self, node):
raise NotImplementedError
def getFirstChild(self, node):
raise NotImplementedError
def getNextSibling(self, node):
raise NotImplementedError
def getParentNode(self, node):
raise NotImplementedError
def __iter__(self):
currentNode = self.tree
while currentNode is not None:
details = self.getNodeDetails(currentNode)
type, details = details[0], details[1:]
hasChildren = False
if type == DOCTYPE:
yield self.doctype(*details)
elif type == TEXT:
for token in self.text(*details):
yield token
elif type == ELEMENT:
namespace, name, attributes, hasChildren = details
if name in voidElements:
for token in self.emptyTag(namespace, name, attributes,
hasChildren):
yield token
hasChildren = False
else:
yield self.startTag(namespace, name, attributes)
elif type == COMMENT:
yield self.comment(details[0])
elif type == ENTITY:
yield self.entity(details[0])
elif type == DOCUMENT:
hasChildren = True
else:
yield self.unknown(details[0])
if hasChildren:
firstChild = self.getFirstChild(currentNode)
else:
firstChild = None
if firstChild is not None:
currentNode = firstChild
else:
while currentNode is not None:
details = self.getNodeDetails(currentNode)
type, details = details[0], details[1:]
if type == ELEMENT:
namespace, name, attributes, hasChildren = details
if name not in voidElements:
yield self.endTag(namespace, name)
if self.tree is currentNode:
currentNode = None
break
nextSibling = self.getNextSibling(currentNode)
if nextSibling is not None:
currentNode = nextSibling
break
else:
currentNode = self.getParentNode(currentNode)
|