This file is indexed.

/usr/lib/python2.7/dist-packages/linkcheck/plugins/parseword.py is in linkchecker 9.3-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2010-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Parse hyperlinks in Word files.
"""
from . import _ParserPlugin
try:
    import win32com
    import pythoncom
    has_win32com = True
    Error = pythoncom.com_error
except ImportError:
    has_win32com = False
    Error = StandardError
from .. import fileutil, log, LOG_PLUGIN


_initialized = False
def init_win32com ():
    """Initialize the win32com.client cache."""
    global _initialized
    if _initialized:
        return
    import win32com.client
    if win32com.client.gencache.is_readonly:
        #allow gencache to create the cached wrapper objects
        win32com.client.gencache.is_readonly = False
        # under py2exe the call in gencache to __init__() does not happen
        # so we use Rebuild() to force the creation of the gen_py folder
        # Note that the python...\win32com.client.gen_py dir must not exist
        # to allow creation of the cache in %temp% for py2exe.
        # This is ensured by excluding win32com.gen_py in setup.py
        win32com.client.gencache.Rebuild()
    _initialized = True


def has_word ():
    """Determine if Word is available on the current system."""
    if not has_win32com:
        return False
    try:
        import _winreg as winreg
    except ImportError:
        import winreg
    try:
        key = winreg.OpenKey(winreg.HKEY_CLASSES_ROOT, "Word.Application")
        winreg.CloseKey(key)
        return True
    except (EnvironmentError, ImportError):
        pass
    return False


def constants (name):
    """Helper to return constants. Avoids importing win32com.client in
    other modules."""
    return getattr(win32com.client.constants, name)


def get_word_app ():
    """Return open Word.Application handle, or None if Word is not available
    on this system."""
    if not has_word():
        return None
    # Since this function is called from different threads, initialize
    # the COM layer.
    pythoncom.CoInitialize()
    import win32com.client
    app = win32com.client.gencache.EnsureDispatch("Word.Application")
    app.Visible = False
    return app


def close_word_app (app):
    """Close Word application object."""
    app.Quit()


def open_wordfile (app, filename):
    """Open given Word file with application object."""
    return app.Documents.Open(filename, ReadOnly=True,
      AddToRecentFiles=False, Visible=False, NoEncodingDialog=True)


def close_wordfile (doc):
    """Close word file."""
    doc.Close()


class WordParser(_ParserPlugin):
    """Word parsing plugin."""

    def __init__(self, config):
        """Check for pdfminer."""
        init_win32com()
        if not has_word():
            log.warn(LOG_PLUGIN, "Microsoft Word not found for WordParser plugin")
        super(WordParser, self).__init__(config)

    def applies_to(self, url_data, pagetype=None):
        """Check for Word pagetype."""
        return has_word() and pagetype == 'word'

    def check(self, url_data):
        """Parse Word data."""
        content = url_data.get_content()
        filename = get_temp_filename(content)
        # open word file and parse hyperlinks
        try:
            app = get_word_app()
            try:
                doc = open_wordfile(app, filename)
                if doc is None:
                    raise Error("could not open word file %r" % filename)
                try:
                    for link in doc.Hyperlinks:
                        line = get_line_number(link.Range)
                        name=link.TextToDisplay
                        url_data.add_url(link.Address, name=name, line=line)
                finally:
                    close_wordfile(doc)
            finally:
                close_word_app(app)
        except Error as msg:
            log.warn(LOG_PLUGIN, "Error parsing word file: %s", msg)


def get_line_number(doc, wrange):
    """Get line number for given range object."""
    lineno = 1
    wrange.Select()
    wdFirstCharacterLineNumber = constants("wdFirstCharacterLineNumber")
    wdGoToLine = constants("wdGoToLine")
    wdGoToPrevious = constants("wdGoToPrevious")
    while True:
        curline = doc.Selection.Information(wdFirstCharacterLineNumber)
        doc.Selection.GoTo(wdGoToLine, wdGoToPrevious, Count=1, Name="")
        lineno += 1
        prevline = doc.Selection.Information(wdFirstCharacterLineNumber)
        if prevline == curline:
            break
    return lineno


def get_temp_filename (content):
    """Get temporary filename for content to parse."""
    # store content in temporary file
    fd, filename = fileutil.get_temp_file(mode='wb', suffix='.doc',
        prefix='lc_')
    try:
        fd.write(content)
    finally:
        fd.close()
    return filename