/usr/share/khmerconverter/modules/unicodeProcess.py is in khmerconverter 1.4-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | #!/usr/bin/python
# -*- coding: utf8 -*-
# Khmer Legacy fonts to Khmer Unicode Conversion
# (c) 2006 The WordForge Foundation, all rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation; either version 2.1
# of the License, or (at your option) any later version.
#
# See the LICENSE file for more details.
#
# Developed by:
# Hok Kakada (hokkakada@khmeros.info)
# Keo Sophon (keosophon@khmeros.info)
# San Titvirak (titvirak@khmeros.info)
# Seth Chanratha (sethchanratha@khmeros.info)
import unittest
import sys
from types import *
def process(sin, data):
"""convert from legacy to unicode
sin : string input as legacy encoding
data: list for legacy to unicode conversion
return value: unicode string
"""
if (data == None or type(data) != ListType or len(data) != 2 or type(data[0]) != DictType or type(data[1]) != ListType):
raise TypeError("Wrong data for conversion.")
if (type(sin) == unicode):
raise TypeError("Input must not be Unicode string.")
condenseData = data[0] # dictionary with character combinations and replacements
replaceData = data[1] # list with character replacement values
sout = u''
listLength = len(replaceData)
i = 0
end = len(sin)
while (i < end):
for key in condenseData.keys():
if (key == sin[i : i+len(key)]):
sout += condenseData[key]
i += len(key)
break
else:
n = ord(sin[i])
if (n < listLength):
sout += replaceData[n]
else:
sout += unichr(n)
i += 1
return sout
class TestProcessing(unittest.TestCase):
def setUp(self):
self.data = [
{
"12":u"_",
u"b¤".encode("cp1252"):u"ឬ",
u"B£".encode("cp1252"):u"ឭ",
u"B¤".encode("cp1252"):u"ឮ",
"abcd":u""
},
[u"*", u"cbc", u"ក", u"កគ", u""]
]
def testConversion(self):
# make sure conversions works like expected
self.assertEqual(process(chr(0), self.data), u"*")
self.assertEqual(process(chr(1), self.data), u"cbc")
self.assertEqual(process(chr(2), self.data), u"ក")
self.assertEqual(process(chr(3), self.data), u"កគ")
self.assertEqual(process(chr(4), self.data), u"")
self.assertEqual(process(chr(3) + chr(0), self.data), u"កគ*")
def testInvalid(self):
# make sure conversions does not break
self.assertEqual(process(unichr(255).encode('cp1252'), self.data), unichr(255))
self.assertEqual(process(unichr(len(self.data[1])).encode('cp1252'), self.data), unichr(len(self.data[1])))
def testTypeError(self):
#make sure module will raise TypeError when data is wrong
self.assertRaises(TypeError, process,'sala', None)
self.assertRaises(TypeError, process,'sala', 1)
def testCondense(self):
self.assertEqual(process('12'.encode('cp1252'), self.data), u"_")
self.assertEqual(process('1212'.encode('cp1252'), self.data), u"__")
self.assertEqual(process('12x12'.encode('cp1252'), self.data), u"_x_")
self.assertEqual(process(u'b¤'.encode('cp1252'), self.data), u"ឬ")
self.assertEqual(process(u'b¤B£B¤'.encode('cp1252'), self.data), u"ឬឭឮ")
self.assertEqual(process('abcd', self.data), u"")
if __name__ == '__main__':
unittest.main()
|