This file is indexed.

/usr/share/espeak-gui/src/language.py is in espeak-gui 0.4-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# -*- coding: utf-8 -*-
#
# Graphical interface for the eSpeak speech synthesizer
#
# Copyright © 2010-2012 Siegfried-Angel Gevatter Pujals <siegfried@gevatter.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
import locale
import ctypes
from xdg import BaseDirectory

class LanguageIdentifier:

    LIBTEXTCAT_DIR = '/usr/share/libtextcat/'

    _libtextcat = None
    _handle = None

    def __init__(self):
        self._conf_file = self._get_list_filename()
        self._libtextcat = ctypes.CDLL('libtextcat.so.0', ctypes.RTLD_GLOBAL)
        self._libtextcat.textcat_Init.restype = ctypes.c_void_p
        self._libtextcat.textcat_Classify.restype = ctypes.c_char_p
        self._libtextcat.textcat_Version.restype = ctypes.c_char_p
        self._handle = self._libtextcat.textcat_Init(self._conf_file)
        self._languages = self._get_language_list(self._conf_file)

    def __del__(self):
        self._libtextcat.textcat_Done(self._handle)

    def identify(self, text):
        result = self._libtextcat.textcat_Classify(self._handle, text, len(text))
        if result in ('SHORT', 'UNKNOWN'):
            return []
        return result.replace('[', '').split(']')[:-1]

    @classmethod
    def _get_list_filename(cls):
        #conffile = os.path.join(cls.LIBTEXTCAT_DIR, 'conf.txt')
        #if os.path.isfile(conffile):
        #    return conffile
        return cls._generate_list(os.path.join(cls.LIBTEXTCAT_DIR, 'LM'))

    @staticmethod
    def _generate_list(path):
        tmpfilename = os.path.join(BaseDirectory.xdg_cache_home,
            'espeak-gui/libtextcat_conf.txt')
        try:
            os.makedirs(os.path.dirname(tmpfilename))
        except OSError:
            pass
        with open(tmpfilename, 'w') as tempfile:
            for filename in (os.path.join(path, name) for name in os.listdir(path)):
                print >>tempfile, "%s\t%s" % (filename,
                    filename.split('/')[-1].split('.', 1)[0])
        return tmpfilename
    
    @staticmethod
    def _get_language_list(filename):
        with open(filename, 'r') as conffile:
            lines = conffile.read().split('\n')
            lines = map(lambda x: x.split('#')[0].strip(), lines)
            lines = [''.join(line.split('\t')[1:]) for line in lines]
            return filter(None, lines)
        return None
    
    @property
    def languages(self):
        return self._languages

class LanguageManager:

    _identifier = None

    _languages_autodetect = None
    _languages_other = None

    _lang2identifier = None         # for espeak-only stuff
    _lang2iso = None                # for libtextcat -> espeak conversion
    _supported_languages = None
    _last_language = None

    def __init__(self, espeak_voices):
        try:
            self._identifier = LanguageIdentifier()
        except Exception, e:
            print "Couldn't initialize language identifier: %s" % e
        
        self._languages_autodetect = []
        self._languages_other = []
        self._lang2identifier = {}
        self._lang2iso = {}
        self._supported_languages = []
        
        for voice in espeak_voices:
            voice_name = voice.name.lower()
            self._register_language(voice_name, voice.identifier)
            if self._identifier and voice_name in self._identifier.languages:
                self._languages_autodetect.append(voice)
                self._supported_languages.append(voice_name)
            else:
                self._languages_other.append(voice)

    def _register_language(self, name, identifier):
        self._lang2identifier[name] = identifier
        iso_name = name
        if name.endswith('-test'):
            iso_name = name[:-5]
        self._lang2iso[iso_name] = identifier.split('/', 1)[-1]

    def get_languages(self, autodetect=None):
        result = []
        if autodetect or autodetect is None:
            result.extend(self._languages_autodetect)
        if not autodetect or autodetect is None:
            result.extend(self._languages_other)
        return result

    def get_default_language(self):
        try:
            language = locale.getlocale()[0].split('_')[0]
        except Exception:
            pass
        
        if language in self._lang2identifier.values():
            return language
        return 'en'

    def autodetect(self, text):
        if not self._identifier:
            # not supported
            return LanguageGuess([], self.get_default_language())
        candidates = self._identifier.identify(text)
        candidates = [x for x in candidates if x in self._supported_languages]
        return LanguageGuess(candidates, self._choose_language(candidates))

    @property
    def autodetect_supported(self):
        return self._identifier

    def _get_iso_from_lang(self, name):
        try:
            return self._lang2iso[name.lower()]
        except KeyError:
            print 'Can\'t translate "%s" to ISO code.' % name
            return None

    def _choose_language(self, candidates):
        language = None
        if candidates:
            # We use to first guessed candidate for which we have an identifier
            langs = list(candidates)
            langs.reverse()
            while langs and not language:
                language = self._get_iso_from_lang(langs.pop())
        elif self._last_language:
            # If that fails, we use the previous guessed language 
            language = self._last_language
        else:
            # If that fails too, we use the default language
            language = self.get_default_language()
        
        self._last_language = language
        return language

class LanguageGuess:

    _candidates = None
    _language = None

    def __init__(self, candidates, language):
        self._candidates = candidates
        self._language = language

    @property
    def good(self):
        return self._candidates

    @staticmethod
    def _cut_string(string, length):
        if len(string) <= length:
            return string
        return "%s..." % string[:length-3]

    def get_display_name(self):
        label = ', '.join(x.capitalize() for x in self._candidates)
        return self._cut_string(label, 25)

    def get_language(self):
        return self._language

if __name__ == '__main__':
    l = LanguageIdentifier()
    # English
    print l.identify("Over the coming weeks, we will be making two ' \
        'important updates that will impact how you interact...")
    # Catalan
    print l.identify('Hola, aquí tens la nova versió del programa. Caldria ' \
        'afegir un nou paràgraf al final de la descripció. ' \
        'Qualsevol cosa em dius. Moltes gràcies!')