/usr/lib/python3/dist-packages/html5lib/filters/whitespace.py is in python3-html5lib 0.999-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | from __future__ import absolute_import, division, unicode_literals
import re
from . import _base
from ..constants import rcdataElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
class Filter(_base.Filter):
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
def __iter__(self):
preserve = 0
for token in _base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag" \
and (preserve or token["name"] in self.spacePreserveElements):
preserve += 1
elif type == "EndTag" and preserve:
preserve -= 1
elif not preserve and type == "SpaceCharacters" and token["data"]:
# Test on token["data"] above to not introduce spaces where there were not
token["data"] = " "
elif not preserve and type == "Characters":
token["data"] = collapse_spaces(token["data"])
yield token
def collapse_spaces(text):
return SPACES_REGEX.sub(' ', text)
|