This file is indexed.

/usr/lib/python2.7/dist-packages/linkcheck/checker/fileurl.py is in linkchecker 9.3-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Handle local file: links.
"""

import re
import os
import urlparse
import urllib
import urllib2
from datetime import datetime

from . import urlbase, get_index_html
from .. import log, LOG_CHECK, fileutil, mimeutil, LinkCheckerError, url as urlutil
from ..bookmarks import firefox
from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH


def get_files (dirname):
    """Get iterator of entries in directory. Only allows regular files
    and directories, no symlinks."""
    for entry in os.listdir(dirname):
        fullentry = os.path.join(dirname, entry)
        if os.path.islink(fullentry):
            continue
        if os.path.isfile(fullentry):
            yield entry
        elif os.path.isdir(fullentry):
            yield entry+"/"


def prepare_urlpath_for_nt (path):
    """
    URLs like 'file://server/path/' result in a path named '/server/path'.
    However urllib.url2pathname expects '////server/path'.
    """
    if '|' not in path:
        return "////"+path.lstrip("/")
    return path


def get_nt_filename (path):
    """Return case sensitive filename for NT path."""
    unc, rest = os.path.splitunc(path)
    head, tail = os.path.split(rest)
    if not tail:
        return path
    for fname in os.listdir(unc+head):
        if fname.lower() == tail.lower():
            return os.path.join(get_nt_filename(unc+head), fname)
    log.error(LOG_CHECK, "could not find %r in %r", tail, head)
    return path


def get_os_filename (path):
    """Return filesystem path for given URL path."""
    if os.name == 'nt':
        path = prepare_urlpath_for_nt(path)
    res = urllib.url2pathname(fileutil.pathencode(path))
    if os.name == 'nt' and res.endswith(':') and len(res) == 2:
        # Work around http://bugs.python.org/issue11474
        res += os.sep
    return res


def is_absolute_path (path):
    """Check if given path is absolute. On Windows absolute paths start
    with a drive letter. On all other systems absolute paths start with
    a slash."""
    if os.name == 'nt':
        if re.search(r"^[a-zA-Z]:", path):
            return True
        path = path.replace("\\", "/")
    return path.startswith("/")


class FileUrl (urlbase.UrlBase):
    """
    Url link with file scheme.
    """

    def init (self, base_ref, base_url, parent_url, recursion_level,
              aggregate, line, column, page, name, url_encoding, extern):
        """Initialize the scheme."""
        super(FileUrl, self).init(base_ref, base_url, parent_url,
         recursion_level, aggregate, line, column, page, name, url_encoding, extern)
        self.scheme = u'file'

    def build_base_url(self):
        """The URL is normed according to the platform:
         - the base URL is made an absolute file:// URL
         - under Windows platform the drive specifier is normed
        """
        if self.base_url is None:
            return
        base_url = self.base_url
        if not (self.parent_url or self.base_ref or base_url.startswith("file:")):
            base_url = os.path.expanduser(base_url)
            if not is_absolute_path(base_url):
                try:
                    base_url = os.getcwd()+"/"+base_url
                except OSError as msg:
                    # occurs on stale remote filesystems (eg. NFS)
                    errmsg = _("Could not get current working directory: %(msg)s") % dict(msg=msg)
                    raise LinkCheckerError(errmsg)
                if os.path.isdir(base_url):
                    base_url += "/"
            base_url = "file://"+base_url
        if os.name == "nt":
            base_url = base_url.replace("\\", "/")
            # transform c:/windows into /c|/windows
            base_url = re.sub("^file://(/?)([a-zA-Z]):", r"file:///\2|", base_url)
            # transform file://path into file:///path
            base_url = re.sub("^file://([^/])", r"file:///\1", base_url)
        self.base_url = unicode(base_url)

    def build_url (self):
        """
        Calls super.build_url() and adds a trailing slash to directories.
        """
        self.build_base_url()
        if self.parent_url is not None:
            # URL joining with the parent URL only works if the query
            # of the base URL are removed first.
            # Otherwise the join function thinks the query is part of
            # the file name.
            from .urlbase import url_norm
            # norm base url - can raise UnicodeError from url.idna_encode()
            base_url, is_idn = url_norm(self.base_url, self.encoding)
            urlparts = list(urlparse.urlsplit(base_url))
            # ignore query part for filesystem urls
            urlparts[3] = ''
            self.base_url = urlutil.urlunsplit(urlparts)
        super(FileUrl, self).build_url()
        # ignore query and fragment url parts for filesystem urls
        self.urlparts[3] = self.urlparts[4] = ''
        if self.is_directory() and not self.urlparts[2].endswith('/'):
            self.add_warning(_("Added trailing slash to directory."),
                           tag=WARN_FILE_MISSING_SLASH)
            self.urlparts[2] += '/'
        self.url = urlutil.urlunsplit(self.urlparts)

    def add_size_info (self):
        """Get size of file content and modification time from filename path."""
        if self.is_directory():
            # Directory size always differs from the customer index.html
            # that is generated. So return without calculating any size.
            return
        filename = self.get_os_filename()
        self.size = fileutil.get_size(filename)
        self.modified = datetime.utcfromtimestamp(fileutil.get_mtime(filename))

    def check_connection (self):
        """
        Try to open the local file. Under NT systems the case sensitivity
        is checked.
        """
        if (self.parent_url is not None and
           not self.parent_url.startswith(u"file:")):
            msg = _("local files are only checked without parent URL or when the parent URL is also a file")
            raise LinkCheckerError(msg)
        if self.is_directory():
            self.set_result(_("directory"))
        else:
            url = fileutil.pathencode(self.url)
            self.url_connection = urllib2.urlopen(url)
            self.check_case_sensitivity()

    def check_case_sensitivity (self):
        """
        Check if url and windows path name match cases
        else there might be problems when copying such
        files on web servers that are case sensitive.
        """
        if os.name != 'nt':
            return
        path = self.get_os_filename()
        realpath = get_nt_filename(path)
        if path != realpath:
            self.add_warning(_("The URL path %(path)r is not the same as the "
                            "system path %(realpath)r. You should always use "
                            "the system path in URLs.") % \
                            {"path": path, "realpath": realpath},
                               tag=WARN_FILE_SYSTEM_PATH)

    def read_content (self):
        """Return file content, or in case of directories a dummy HTML file
        with links to the files."""
        if self.is_directory():
            data = get_index_html(get_files(self.get_os_filename()))
            if isinstance(data, unicode):
                data = data.encode("iso8859-1", "ignore")
        else:
            data = super(FileUrl, self).read_content()
        return data

    def get_os_filename (self):
        """
        Construct os specific file path out of the file:// URL.

        @return: file name
        @rtype: string
        """
        return get_os_filename(self.urlparts[2])

    def get_temp_filename (self):
        """Get filename for content to parse."""
        return self.get_os_filename()

    def is_directory (self):
        """
        Check if file is a directory.

        @return: True iff file is a directory
        @rtype: bool
        """
        filename = self.get_os_filename()
        return os.path.isdir(filename) and not os.path.islink(filename)

    def is_parseable (self):
        """Check if content is parseable for recursion.

        @return: True if content is parseable
        @rtype: bool
        """
        if self.is_directory():
            return True
        if firefox.has_sqlite and firefox.extension.search(self.url):
            return True
        if self.content_type in self.ContentMimetypes:
            return True
        log.debug(LOG_CHECK, "File with content type %r is not parseable.", self.content_type)
        return False

    def set_content_type (self):
        """Return URL content type, or an empty string if content
        type could not be found."""
        if self.url:
            self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
        else:
            self.content_type = u""

    def get_intern_pattern (self, url=None):
        """Get pattern for intern URL matching.

        @return non-empty regex pattern or None
        @rtype String or None
        """
        if url is None:
            url = self.url
        if not url:
            return None
        if url.startswith('file://'):
            i = url.rindex('/')
            if i > 6:
                # remove last filename to make directory internal
                url = url[:i+1]
        return re.escape(url)

    def add_url (self, url, line=0, column=0, page=0, name=u"", base=None):
        """If a local webroot directory is configured, replace absolute URLs
        with it. After that queue the URL data for checking."""
        webroot = self.aggregate.config["localwebroot"]
        if webroot and url and url.startswith(u"/"):
            url = webroot + url[1:]
            log.debug(LOG_CHECK, "Applied local webroot `%s' to `%s'.", webroot, url)
        super(FileUrl, self).add_url(url, line=line, column=column, page=page, name=name, base=base)