This file is indexed.

/usr/lib/python3/dist-packages/tidylib/tidy.py is in python3-tidylib 0.3.2~dfsg-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# Copyright 2009-2015 Jason Stitt
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import ctypes
import ctypes.util
import threading
import platform
import warnings
from contextlib import contextmanager
from .sink import create_sink, destroy_sink

__all__ = ['Tidy', 'PersistentTidy']

# Default search order for library names if nothing is passed in
LIB_NAMES = ['libtidy', 'libtidy.so', 'libtidy-0.99.so.0', 'cygtidy-0-99-0',
             'tidylib', 'libtidy.dylib', 'tidy']

# Error code from library
ENOMEM = -12

# Default options; can be overriden with argument to Tidy()
BASE_OPTIONS = {
    "indent": 1,           # Pretty; not too much of a performance hit
    "tidy-mark": 0,        # No tidy meta tag in output
    "wrap": 0,             # No wrapping
    "alt-text": "",        # Help ensure validation
    "doctype": 'strict',   # Little sense in transitional for tool-generated markup...
    "force-output": 1,     # May not get what you expect but you will get something
}

KEEP_DOC_WARNING = "keep_doc and release_tidy_doc are no longer used. Create a PersistentTidy object instead."

# Fix for Windows b/c tidy uses stdcall on Windows
if "Windows" == platform.system():
    load_library = ctypes.windll.LoadLibrary
else:
    load_library = ctypes.cdll.LoadLibrary

# -------------------------------------------------------------------------- #
# 3.x/2.x cross-compatibility

try:
    unicode  # 2.x

    def is_unicode(obj):
        return isinstance(obj, unicode)

    def encode_key_value(k, v):
        return unicode(k).encode('utf-8'), unicode(v).encode('utf-8')
except NameError:
    # 3.x
    def is_unicode(obj):
        return isinstance(obj, str)

    def encode_key_value(k, v):
        return str(k).encode('utf-8'), str(v).encode('utf-8')

# -------------------------------------------------------------------------- #
# The main python interface


class Tidy(object):

    """ Wrapper around the HTML Tidy library for cleaning up possibly invalid
    HTML and XHTML. """

    def __init__(self, lib_names=None):
        self._tidy = None
        if lib_names is None:
            lib_names = ctypes.util.find_library('tidy') or LIB_NAMES
        if isinstance(lib_names, str):
            lib_names = [lib_names]
        for name in lib_names:
            try:
                self._tidy = load_library(name)
                break
            except OSError:
                continue
        if self._tidy is None:
            raise OSError(
                "Could not load libtidy using any of these names: "
                + ",".join(lib_names))
        self._tidy.tidyCreate.restype = ctypes.POINTER(ctypes.c_void_p)  # Fix for 64-bit systems

    @contextmanager
    def _doc_and_sink(self):
        " Create and cleanup a Tidy document and error sink "
        doc = self._tidy.tidyCreate()
        sink = create_sink()
        self._tidy.tidySetErrorSink(doc, sink)
        yield (doc, sink)
        destroy_sink(sink)
        self._tidy.tidyRelease(doc)

    def tidy_document(self, text, options=None):
        """ Run a string with markup through HTML Tidy; return the corrected one
        and any error output.

        text: The markup, which may be anything from an empty string to a complete
        (X)HTML document. If you pass in a unicode type (py3 str, py2 unicode) you
        get one back out, and tidy will have some options set that may affect
        behavior (e.g. named entities converted to plain unicode characters). If
        you pass in a bytes type (py3 bytes, py2 str) you will get one of those
        back.

        options (dict): Options passed directly to HTML Tidy; see the HTML Tidy docs
        (http://tidy.sourceforge.net/docs/quickref.html) or run tidy -help-config
        from the command line.

        returns (str, str): The tidied markup and unparsed warning/error messages.
        Warnings and errors are returned just as tidylib returns them.
        """

        # Unicode approach is to encode as string, then decode libtidy output
        use_unicode = False
        if is_unicode(text):
            use_unicode = True
            text = text.encode('utf-8')

        with self._doc_and_sink() as (doc, sink):
            tidy_options = dict(BASE_OPTIONS)
            if options:
                tidy_options.update(options)
            if use_unicode:
                tidy_options['input-encoding'] = 'utf8'
                tidy_options['output-encoding'] = 'utf8'
            for key in tidy_options:
                value = tidy_options[key]
                key = key.replace('_', '-')
                if value is None:
                    value = ''
                key, value = encode_key_value(key, value)
                self._tidy.tidyOptParseValue(doc, key, value)
                error = str(sink)
                if error:
                    raise ValueError("(tidylib) " + error)

            self._tidy.tidyParseString(doc, text)
            self._tidy.tidyCleanAndRepair(doc)

            # Guess at buffer size; tidy returns ENOMEM if the buffer is too
            # small and puts the required size into out_length
            out_length = ctypes.c_int(8192)
            out = ctypes.c_buffer(out_length.value)
            while ENOMEM == self._tidy.tidySaveString(doc, out, ctypes.byref(out_length)):
                out = ctypes.c_buffer(out_length.value)

            document = out.value
            if use_unicode:
                document = document.decode('utf-8')
            errors = str(sink)

        return (document, errors)

    def tidy_fragment(self, text, options=None):
        """ Tidy a string with markup and return only the <body> contents.

        HTML Tidy normally returns a full (X)HTML document; this function returns only
        the contents of the <body> element and is meant to be used for snippets.
        Calling tidy_fragment on elements that don't go in the <body>, like <title>,
        will produce incorrect behavior.

        Arguments and return value are the same as tidy_document. Note that HTML
        Tidy will always complain about the lack of a doctype and <title> element
        in fragments, and these errors are not stripped out for you. """
        options = dict(options) if options else dict()
        options["show-body-only"] = 1
        document, errors = self.tidy_document(text, options)
        document = document.strip()
        return document, errors


class PersistentTidy(Tidy):

    """ Functions the same as the Tidy class but keeps a persistent reference
    to one Tidy document object. This increases performance slightly when
    tidying many documents in a row. It also persists all options (not just
    the base options) between runs, which could lead to unexpected behavior.
    If you plan to use different options on each run with PersistentTidy, set
    all options that could change on every call. Note that passing in unicode
    text will result in the input-encoding and output-encoding options being
    automatically set. Thread-local storage is used for the document object
    (one document per thread). """

    def __init__(self, lib_names=None):
        Tidy.__init__(self, lib_names)
        self._local = threading.local()
        self._local.doc = self._tidy.tidyCreate()

    def __del__(self):
        self._tidy.tidyRelease(self._local.doc)

    @contextmanager
    def _doc_and_sink(self):
        " Create and cleanup an error sink but use the persistent doc object "
        sink = create_sink()
        self._tidy.tidySetErrorSink(self._local.doc, sink)
        yield (self._local.doc, sink)
        destroy_sink(sink)


def tidy_document(text, options=None, keep_doc=False):
    if keep_doc:
        warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
    return get_module_tidy().tidy_document(text, options)


def tidy_fragment(text, options=None, keep_doc=False):
    if keep_doc:
        warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
    return get_module_tidy().tidy_fragment(text, options)


def get_module_tidy():
    global _tidy
    if '_tidy' not in globals():
        _tidy = Tidy()
    return _tidy


def release_tidy_doc():
    warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)