This file is indexed.

/usr/lib/python2.7/dist-packages/gtkspellcheck/oxt_extract.py is in python-gtkspellcheck 4.0.5-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# -*- coding:utf-8 -*-
#
# Copyright (C) 2012, Carlos Jenkins <carlos@jenkins.co.cr>
# Copyright (C) 2012-2016, Maximilian Köhl <mail@koehlma.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
This module extracts the .dic and .aff (Hunspell) dictionaries from any given 
.oxt extension.

Extensions could be found at:

    http://extensions.services.openoffice.org/dictionary
"""

import functools
import gettext
import logging
import os
import shutil
import sys
import warnings
import xml.dom.minidom
import xml.parsers.expat
import zipfile

# enable deprecation warnings
warnings.simplefilter('always', DeprecationWarning)

# public objects
__all__ = ['extract_oxt', 'batch_extract', 'BadXml', 'BadExtensionFile',
           'ExtractPathIsNoDirectory', 'BATCH_SUCCESS', 'BATCH_ERROR',
           'BATCH_WARNING']

# logger
logger = logging.getLogger(__name__)

# translation
locale_name = 'py{}gtkspellcheck'.format(sys.version_info.major)
_ = gettext.translation(locale_name, fallback=True).gettext

class BadXml(Exception):
    """
    The XML dictionary registry is not valid XML.
    """
    
class BadExtensionFile(Exception):
    """
    The extension has a wrong file format, should be a ZIP file.
    """

class ExtractPathIsNoDirectory(Exception):
    """
    The given `extract_path` is no directory.
    """


def find_dictionaries(registry):
    def oor_name(name, element):
        return element.attributes['oor:name'].value.lower() == name
    
    def get_property(name, properties):
        property = list(filter(functools.partial(oor_name, name),
                               properties))
        if property:
            return property[0].getElementsByTagName('value')[0]
    
    result = []
    
    # find all "node" elements which have "dictionaries" as "oor:name" attribute
    for dictionaries in filter(functools.partial(oor_name, 'dictionaries'),
                               registry.getElementsByTagName('node')):
        # for all "node" elements in this dictionary nodes
        for dictionary in dictionaries.getElementsByTagName('node'):
            # get all "prop" elements
            properties = dictionary.getElementsByTagName('prop')
            # get the format property as text
            format = get_property('format', properties).firstChild.data.strip()
            if format and format == 'DICT_SPELL':
                # find the locations property
                locations = get_property('locations', properties)
                # if the location property is text:
                # %origin%/dictionary.aff %origin%/dictionary.dic
                if locations.firstChild.nodeType == xml.dom.Node.TEXT_NODE:
                    locations = locations.firstChild.data
                    locations = locations.replace('%origin%/', '').strip()
                    result.append(locations.split())
                # otherwise:
                # <i>%origin%/dictionary.aff</i> <i>%origin%/dictionary.dic</i>
                else:
                    locations = [item.firshChild.data.replace('%origin%/', '') \
                                 .strip() for item in
                                 locations.getElementsByTagName('it')]
                    result.append(locations)
    
    return result

def extract(filename, target, override=False):
    """
    Extract Hunspell dictionaries out of LibreOffice ``.oxt`` extensions.

    :param filename: path to the ``.oxt`` extension
    :param target: path to extract Hunspell dictionaries to
    :param override: override existing files in the target directory
    :rtype: list of the extracted dictionaries

    This function extracts the Hunspell dictionaries (``.dic`` and ``.aff``
    files) from the given ``.oxt`` extension found to ``target``.

    Extensions could be found at:

        http://extensions.services.openoffice.org/dictionary
    """
    # TODO 5.0: remove this function
    warnings.warn(('call to deprecated function "{}", '
                   'moved to separate package "oxt_extract", '
                   'will be removed in pygtkspellcheck 5.0').format(extract.__name__),
                  category=DeprecationWarning)
    try:
        with zipfile.ZipFile(filename, 'r') as extension:
            files = extension.namelist()
            
            registry = 'dictionaries.xcu'
            if not registry in files:
                for filename in files:
                    if filename.lower().endswith(registry):
                        registry = filename
                    
            if registry in files:
                registry = xml.dom.minidom.parse(extension.open(registry))
                dictionaries = find_dictionaries(registry)
                extracted = []
                for dictionary in dictionaries:
                    for filename in dictionary:
                        dict_file = os.path.join(target,
                                                 os.path.basename(filename))
                        if (not os.path.exists(dict_file) 
                                or (override and os.path.isfile(dict_file))):
                            if filename in files:
                                with open(dict_file, 'wb') as _target:
                                    with extension.open(filename, 'r') as _source:
                                        extracted.append(os.path.basename(filename))
                                        _target.write(_source.read())
                            else:
                                logger.warning('dictionary exists in registry '
                                               'but not in the extension zip')
                        else:
                            logging.warning(('dictionary file "{}" already exists '
                                             'and not overriding it'
                                             ).format(dict_file))
                return extracted
    except zipfile.BadZipfile:
        raise BadExtensionFile('extension is not a valid ZIP file')
    except xml.parsers.expat.ExpatError:
        raise BadXml('dictionary registry is not valid XML')

BATCH_SUCCESS = 'success'
BATCH_ERROR = 'error'
BATCH_WARNING = 'warning'

def batch_extract(oxt_path, extract_path, override=False, move_path=None):
    """
    Uncompress, read and install LibreOffice ``.oxt`` dictionaries extensions.
    
    :param oxt_path: path to a directory containing the ``.oxt`` extensions
    :param extract_path: path to extract Hunspell dictionaries files to
    :param override: override already existing files
    :param move_path: optional path to move the ``.oxt`` files after processing
    :rtype: generator over all extensions, yielding result, extension name,
        error, extracted dictionaries and translated error message - result
        would be :const:`BATCH_SUCCESS` for success, :const:`BATCH_ERROR` if
        some error happened or :const:`BATCH_WARNING` which contain some warning
        messages instead of errors
    
    This function extracts the Hunspell dictionaries (``.dic`` and ``.aff``
    files) from all the ``.oxt`` extensions found on ``oxt_path`` directory to
    the ``extract_path`` directory.
    
    Extensions could be found at:
    
        http://extensions.services.openoffice.org/dictionary
    
    In detail, this functions does the following:
    
    1. find all the ``.oxt`` extension files within ``oxt_path``
    2. open (unzip) each extension
    3. find the dictionary definition file within (*dictionaries.xcu*)
    4. parse the dictionary definition file and locate the dictionaries files
    5. uncompress those files to ``extract_path``
    
    
    By default file overriding is disabled, set ``override`` parameter to True
    if you want to enable it. As additional option, each processed extension can
    be moved to ``move_path``.
    
    Example::
    
        for result, name, error, dictionaries, message in oxt_extract.batch_extract(...):
            if result == oxt_extract.BATCH_SUCCESS:
                print('successfully extracted extension "{}"'.format(name))
            elif result == oxt_extract.BATCH_ERROR:
                print('could not extract extension "{}"'.format(name))
                print(message)
                print('error {}'.format(error))
            elif result == oxt_extract.BATCH_WARNING:
                print('warning during processing extension "{}"'.format(name))
                print(message)
                print(error)
        
    """

    # TODO 5.0: remove this function
    warnings.warn(('call to deprecated function "{}", '
                   'moved to separate package "oxt_extract", '
                   'will be removed in pygtkspellcheck 5.0').format(extract.__name__),
                  category=DeprecationWarning)

    # get the real, absolute and normalized path
    oxt_path = os.path.normpath(os.path.abspath(os.path.realpath(oxt_path)))
    
    # check that the input directory exists
    if not os.path.isdir(oxt_path):
        return
        
    # create extract directory if not exists
    if not os.path.exists(extract_path):
        os.makedirs(extract_path)

    # check that the extract path is a directory
    if not os.path.isdir(extract_path):
        raise ExtractPathIsNoDirectory('extract path is not a valid directory')
    
    # get all .oxt extension at given path
    oxt_files = [extension for extension in os.listdir(oxt_path)
                 if extension.lower().endswith('.oxt')]
    
    for extension_name in oxt_files:
        extension_path = os.path.join(oxt_path, extension_name)
        
        try:
            dictionaries = extract(extension_path, extract_path, override)
            yield BATCH_SUCCESS, extension_name, None, dictionaries, ''
        except BadExtensionFile as error:
            logger.error(('extension "{}" is not a valid ZIP file'
                          ).format(extension_name))
            yield (BATCH_ERROR, extension_name, error, [],
                   _('extension "{}" is not a valid ZIP file'
                     ).format(extension_name))
        except BadXml as error:
            logger.error(('extension "{}" has no valid XML dictionary registry'
                          ).format(extension_name))
            yield (BATCH_ERROR, extension_name, error, [],
                   _('extension "{}" has no valid XML dictionary registry'
                     ).format(extension_name)) 
        
        # move the extension after processing if user requires it
        if move_path is not None:
            # create move path if it doesn't exists
            if not os.path.exists(move_path):
                os.makedirs(move_path)
            # move to the given path only if it is a directory and target
            # doesn't exists
            if os.path.isdir(move_path):
                if (not os.path.exists(os.path.join(move_path, extension_name))
                        or override):
                    shutil.move(extension_path, move_path)
                else:
                    logger.warning(('unable to move extension, file with same '
                                    'name exists within move_path'))
                    yield (BATCH_WARNING, extension_name,
                           ('unable to move extension, file with same name '
                            'exists within move_path'), [],
                           _('unable to move extension, file with same name '
                             'exists within move_path'))
            else:
                logger.warning(('unable to move extension, move_path is not a '
                                'directory'))
                yield (BATCH_WARNING, extension_name,
                       ('unable to move extension, move_path is not a '
                        'directory'), [],
                       _('unable to move extension, move_path is not a '
                         'directory'))