This file is indexed.

/usr/lib/python2.7/dist-packages/linkcheck/plugins/syntaxchecks.py is in linkchecker 9.3-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import threading
import time
import requests
from xml.dom.minidom import parseString
from . import _ContentPlugin
from .. import log, LOG_PLUGIN
from ..decorators import synchronized


_w3_time_lock = threading.Lock()


class W3Timer(object):
    """Ensure W3C apis are not hammered."""

    # every X seconds
    SleepSeconds = 2

    def __init__(self):
        """Remember last API call."""
        self.last_w3_call = 0

    @synchronized(_w3_time_lock)
    def check_w3_time (self):
        """Make sure the W3C validators are at most called once a second."""
        if time.time() - self.last_w3_call < W3Timer.SleepSeconds:
            time.sleep(W3Timer.SleepSeconds)
        self.last_w3_call = time.time()


class HtmlSyntaxCheck(_ContentPlugin):
    """Check the syntax of HTML pages with the online W3C HTML validator.
    See http://validator.w3.org/docs/api.html.
    """
    def __init__(self, config):
        """Initialize plugin."""
        super(HtmlSyntaxCheck, self).__init__(config)
        self.timer = W3Timer()

    def applies_to(self, url_data):
        """Check for HTML and extern."""
        return url_data.is_html() and not url_data.extern[0]

    def check(self, url_data):
        """Check HTML syntax of given URL."""
        self.timer.check_w3_time()
        session = url_data.session
        try:
            body = {'uri': url_data.url, 'output': 'soap12'}
            response = session.post('http://validator.w3.org/check', data=body)
            response.raise_for_status()
            if response.headers.get('x-w3c-validator-status', 'Invalid') == 'Valid':
                url_data.add_info(u"W3C Validator: %s" % _("valid HTML syntax"))
                return
            check_w3_errors(url_data, response.text, "W3C HTML")
        except requests.exceptions.RequestException:
            pass # ignore service failures
        except Exception as msg:
            log.warn(LOG_PLUGIN, _("HTML syntax check plugin error: %(msg)s ") % {"msg": msg})


class CssSyntaxCheck(_ContentPlugin):
    """Check the syntax of HTML pages with the online W3C CSS validator.
    See http://jigsaw.w3.org/css-validator/manual.html#expert.
    """

    def __init__(self, config):
        """Initialize plugin."""
        super(CssSyntaxCheck, self).__init__(config)
        self.timer = W3Timer()

    def applies_to(self, url_data):
        """Check for CSS and extern."""
        return url_data.is_css() and not url_data.extern[0]

    def check(self, url_data):
        """Check CSS syntax of given URL."""
        self.timer.check_w3_time()
        session = url_data.session
        try:
            url = 'http://jigsaw.w3.org/css-validator/validator'
            params = {
                'uri': url_data.url,
                'warning': '2',
                'output': 'soap12',
            }
            response = session.get(url, params=params)
            response.raise_for_status()
            if response.headers.get('X-W3C-Validator-Status', 'Invalid') == 'Valid':
                url_data.add_info(u"W3C Validator: %s" % _("valid CSS syntax"))
                return
            check_w3_errors(url_data, response.text, "W3C HTML")
        except requests.exceptions.RequestException:
            pass # ignore service failures
        except Exception as msg:
            log.warn(LOG_PLUGIN, _("CSS syntax check plugin error: %(msg)s ") % {"msg": msg})


def check_w3_errors (url_data, xml, w3type):
    """Add warnings for W3C HTML or CSS errors in xml format.
    w3type is either "W3C HTML" or "W3C CSS"."""
    dom = parseString(xml)
    for error in dom.getElementsByTagName('m:error'):
        warnmsg = _("%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s")
        attrs = {
            "w3type": w3type,
            "line": getXmlText(error, "m:line"),
            "column": getXmlText(error, "m:col"),
            "msg": getXmlText(error, "m:message"),
        }
        url_data.add_warning(warnmsg % attrs)


def getXmlText (parent, tag):
    """Return XML content of given tag in parent element."""
    elem = parent.getElementsByTagName(tag)[0]
    # Yes, the DOM standard is awful.
    rc = []
    for node in elem.childNodes:
        if node.nodeType == node.TEXT_NODE:
            rc.append(node.data)
    return ''.join(rc)