This file is indexed.

/usr/share/khmerconverter/modules/legacyReorder.py is in khmerconverter 1.4-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
#!/usr/bin/python
# -*- coding: utf8 -*-

# Khmer Lnicode fonts to Khmer Legacy Conversion
# (c) 2006 The WordForge Foundation, all rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation; either version 2.1
# of the License, or (at your option) any later version.
#
# See the LICENSE file for more details.
#
# Developed by:
#       Hok Kakada (hokkakada@khmeros.info)
#       Keo Sophon (keosophon@khmeros.info)
#       San Titvirak (titvirak@khmeros.info)
#       Seth Chanratha (sethchanratha@khmeros.info)
#
# This module reorder unicode string accordding unicode order
import unittest


# important character to test in order to form a cluster
SRAAA = unichr(0x17B6)
SRAE = unichr(0x17C1)
SRAOE = unichr(0x17BE)
SRAOO = unichr(0x17C4)
SRAYA = unichr(0x17BF)
SRAIE = unichr(0x17C0)
SRAAU = unichr(0x17C5)
SRAII = unichr(0x17B8)
SRAU = unichr(0x17BB)
TRIISAP = unichr(0x17CA)
MUUSIKATOAN = unichr(0x17C9)
SAMYOKSANNYA = unichr(0x17D0)

LA = unichr(0x17A1)
NYO = unichr(0x1789)
BA = unichr(0x1794)
YO = unichr(0x1799)
SA = unichr(0x179F)
COENG = unichr(0x17D2)
CORO = unichr(0x17D2) + unichr(0x179A)
CONYO = unichr(0x17D2) + unichr(0x1789)
SRAOM = unichr(0x17C6)

MARK = unichr(0x17EA)
#TODO: think about another relacement for the dotted circle;
DOTCIRCLE = u''

# possible combination for sra E
sraEcombining = {
    SRAOE:SRAII,
    SRAYA:SRAYA,
    SRAIE:SRAIE,
    SRAOO:SRAAA,
    SRAAU:SRAAU
    }

CC_RESERVED             =  0
CC_CONSONANT            =  1    # Consonant of type 1 or independent vowel
CC_CONSONANT2           =  2    # Consonant of type 2
CC_CONSONANT3           =  3    # Consonant of type 3
CC_ZERO_WIDTH_NJ_MARK   =  4    # Zero Width non joiner character (0x200C)
CC_CONSONANT_SHIFTER    =  5
CC_ROBAT                =  6    # Khmer special diacritic accent -treated differently in state table
CC_COENG                =  7    # Subscript consonant combining character
CC_DEPENDENT_VOWEL      =  8
CC_SIGN_ABOVE           =  9
CC_SIGN_AFTER           = 10
CC_ZERO_WIDTH_J_MARK    = 11    # Zero width joiner character
CC_COUNT                = 12    # This is the number of character classes



CF_CLASS_MASK    = 0x0000FFFF

CF_CONSONANT     = 0x01000000   # flag to speed up comparing
CF_SPLIT_VOWEL   = 0x02000000   # flag for a split vowel -> the first part is added in front of the syllable
CF_DOTTED_CIRCLE = 0x04000000   # add a dotted circle if a character with this flag is the first in a
                                # syllable
CF_COENG         = 0x08000000   # flag to speed up comparing
CF_SHIFTER       = 0x10000000   # flag to speed up comparing
CF_ABOVE_VOWEL   = 0x20000000   # flag to speed up comparing

# position flags
CF_POS_BEFORE    = 0x00080000
CF_POS_BELOW     = 0x00040000
CF_POS_ABOVE     = 0x00020000
CF_POS_AFTER     = 0x00010000
CF_POS_MASK      = 0x000f0000

# simple classes, they are used in the state table (in this file) to control the length of a syllable
# they are also used to know where a character should be placed (location in reference to the base character)
# and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
# indicate error in syllable construction
_xx = CC_RESERVED
_sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE
_sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER
_c1 = CC_CONSONANT | CF_CONSONANT
_c2 = CC_CONSONANT2 | CF_CONSONANT
_c3 = CC_CONSONANT3 | CF_CONSONANT
_rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE
_cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER
_dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE
_db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE
_da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL
_dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE
_co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE

# split vowel
_va = _da | CF_SPLIT_VOWEL
_vr = _dr | CF_SPLIT_VOWEL


# Character class tables
# _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
# _sa Sign placed above the base
# _sp Sign placed after the base
# _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
# _c2 Consonant of type 2 (only RO)
# _c3 Consonant of type 3
# _rb Khmer sign robat u17CC. combining mark for subscript consonants
# _cd Consonant-shifter
# _dl Dependent vowel placed before the base (left of the base)
# _db Dependent vowel placed below the base
# _da Dependent vowel placed above the base
# _dr Dependent vowel placed behind the base (right of the base)
# _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
#     it to create a subscript consonant or independent vowel
# _va Khmer split vowel in wich the first part is before the base and the second one above the base
# _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base

khmerCharClasses = [
    _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, # 1780 - 178F
    _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, # 1790 - 179F
    _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, # 17A0 - 17AF
    _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, # 17B0 - 17BF
    _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, # 17C0 - 17CF
    _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, # 17D0 - 17DF
    ]




#khmerStateTable[][CC_COUNT] =
khmerStateTable = [
    # xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj
    [ 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2], #  0 - ground state
    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], #  1 - exit state (or sign to the right of the
                                                      #      syllable)
    [-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1], #  2 - Base consonant
    [-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1], #  3 - First ZWNJ before a register shifter
                                                      #      It can only be followed by a shifter or a vowel
    [-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14], #  4 - First register shifter
    [-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1], #  5 - Robat
    [-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1], #  6 - First Coeng
    [-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14], #  7 - First consonant of type 1 after coeng
    [-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14], #  8 - First consonant of type 2 after coeng
    [-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14], #  9 - First consonant or type 3 after ceong
    [-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1], # 10 - Second Coeng (no register shifter before)
    [-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14], # 11 - Second coeng consonant (or ind. vowel) no
                                                      #      register shifter before
    [-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1], # 12 - Second ZWNJ before a register shifter
    [-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14], # 13 - Second register shifter
    [-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1], # 14 - ZWJ before vowel
    [-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1], # 15 - ZWNJ before vowel
    [-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18], # 16 - dependent vowel
    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, 18], # 17 - sign above
    [-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1], # 18 - ZWJ after vowel
    [-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1], # 19 - Third coeng
    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1]  # 20 - dependent vowel after a Robat
    ]



def getCharClass(uniChar):
    """
    input one unicode character;
    output an integer which is the Khmer type of the character or 0
    """
    if (type(uniChar) != unicode):
        raise TypeError('only accept unicode character')

    if (len(uniChar) != 1):
        raise TypeError('only accept one character, but ' + str(len(uniChar)) + ' chars found.')

    ch = ord(uniChar[0])
    if (ch >= 0x1780):
        ch -= 0x1780
        if (ch < len(khmerCharClasses)):
            return khmerCharClasses[ch]
    return 0

def reorder(sin):
    """
    Given an input string of unicode cluster to reorder.
    The return is the visual based cluster (legacy style) string.
    """
    if (type(sin) != unicode):
        raise TypeError('only accept unicode string')

    cursor = 0
    state = 0
    charCount = len(sin)
    result = u''

    while (cursor < charCount):
        reserved    = ''
        signAbove   = ''
        signAfter   = ''
        base        = ''
        robat       = ''
        shifter     = ''
        vowelBefore = ''
        vowelBelow  = ''
        vowelAbove  = ''
        vowelAfter  = ''
        coeng       = False
        cluster     = ''

        coeng1 = ''
        coeng2 = ''

        shifterAfterCoeng = False

        while (cursor < charCount):

            curChar = sin[cursor]
            kChar = getCharClass(curChar)
            charClass = kChar & CF_CLASS_MASK
            state = khmerStateTable[state][charClass]
            if (state < 0):
                break

            ## collect variable for cluster here

            if (kChar == _xx):
                reserved = curChar
            elif (kChar == _sa):        # Sign placed above the base
                signAbove = curChar
            elif (kChar == _sp):        # Sign placed after the base
                signAfter = curChar
            elif (kChar == _c1) or (kChar == _c2) or (kChar == _c3):    # Consonant
                if (coeng):
                    if (not coeng1):
                        coeng1 = COENG + curChar
                    else:
                        coeng2 = COENG + curChar
                    coeng = False
                else:
                    base = curChar
            elif (kChar == _rb):            # Khmer sign robat u17CC
                robat = curChar
            elif (kChar == _cs):            # Consonant-shifter
                if (coeng1):
                    shifterAfterCoeng = True
                shifter = curChar
            elif (kChar == _dl):            # Dependent vowel placed before the base
                vowelBefore = curChar
            elif (kChar == _db):            # Dependent vowel placed below the base
                vowelBelow = curChar
            elif (kChar == _da):            # Dependent vowel placed above the base
                vowelAbove = curChar
            elif (kChar == _dr):            # Dependent vowel placed behind the base
                vowelAfter = curChar
            elif (kChar == _co):            # Khmer combining mark COENG
                coeng = True
            elif (kChar == _va):            # Khmer split vowel, see _da
                vowelBefore = SRAE
                vowelAbove = sraEcombining[curChar]
            elif (kChar == _vr):            # Khmer split vowel, see _dr
                vowelBefore = SRAE
                vowelAfter = sraEcombining[curChar]

            cursor += 1
        # end of while (a cluster has found)

        # logic of vowel
        # determine if right side vowel should be marked
        if (coeng1 and vowelBelow):
            vowelBelow = MARK + vowelBelow
        elif ((base == LA or base == NYO) and vowelBelow): 
            vowelBelow = MARK + vowelBelow
        elif (coeng1 and vowelBefore and vowelAfter):
            vowelAfter = MARK + vowelAfter

        # logic when cluster has coeng
        # should coeng be located on left side
        coengBefore = ''
        if (coeng1 == CORO):
            coengBefore = coeng1
            coeng1 = ''
        elif (coeng2 == CORO):
            coengBefore = MARK + coeng2
            coeng2 = ''
        if (coeng1 or coeng2):
            # NYO must change to other form when there is coeng
            if (base == NYO):
                base = MARK + base
                # coeng NYO must be marked
                if (coeng1 == CONYO):
                    coeng1 = MARK + coeng1

            if (coeng1 and coeng2):
                coeng2 = MARK + coeng2

        # logic of shifter with base character
        if (base and shifter):
            # special case apply to BA only
            if (vowelAbove) and (base == BA) and (shifter == TRIISAP):
                vowelAbove = MARK + vowelAbove
            elif (vowelAbove):
                shifter = MARK + shifter
            elif (signAbove == SAMYOKSANNYA) and (shifter == MUUSIKATOAN):
                shifter = MARK + shifter
            elif (signAbove and vowelAfter):
                shifter = MARK + shifter
            elif (signAbove):
                signAbove = MARK + signAbove
            # add another mark to shifter
            if (coeng1) and (vowelAbove or signAbove):
                shifter = MARK + shifter
            if (base == LA or base == NYO): 
                shifter = MARK + shifter

        # uncomplete coeng
        if (coeng and not coeng1):
            coeng1 = COENG
        elif (coeng and not coeng2):
            coeng2 = MARK + COENG

        # render DOTCIRCLE for standalone sign or vowel
        if (not base) and (vowelBefore or coengBefore or robat or shifter or coeng1 or coeng2 or vowelAfter or vowelBelow or vowelAbove or signAbove or signAfter):
            base = DOTCIRCLE

        # place of shifter
        shifter1 = ''
        shifter2 = ''
        if (shifterAfterCoeng):
            shifter2 = shifter
        else:
            shifter1 = shifter

        specialCaseBA = False
        if (base == BA) and ((vowelAfter == SRAAA) or (vowelAfter == SRAAU) or (vowelAfter == MARK + SRAAA) or (vowelAfter == MARK + SRAAU)):
            # SRAAA or SRAAU will get a MARK if there is coeng, redefine to last char
            vowelAfter = vowelAfter[-1]
            specialCaseBA = True
            if (coeng1) and (coeng1[-1] in [BA, YO, SA]):
                specialCaseBA = False
        
        # cluster formation
        if (specialCaseBA):
            cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2 + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter
        else:
            cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2 + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter

        result += cluster + reserved
        state = 0
    # end of while
    return result


class TestReordering(unittest.TestCase):

    def testKhmerType(self):
        # make sure the types are correct
        self.assertEqual(getCharClass(unichr(0x177F)), 0)
        self.assertEqual(getCharClass(unichr(0x1780)), _c1)
        self.assertEqual(getCharClass(unichr(0x1790)), _c1)
        self.assertEqual(getCharClass(unichr(0x17A0)), _c1)
        self.assertEqual(getCharClass(unichr(0x17B0)), _c1)
        self.assertEqual(getCharClass(unichr(0x17C0)), _vr)
        self.assertEqual(getCharClass(unichr(0x17D0)), _sa)
        self.assertEqual(getCharClass(unichr(0x17D4)), 0)
        self.assertEqual(getCharClass(unichr(0x17ff)), 0)

    def testReordering(self):
        # low vowel under coeng go deeper
        self.assertEqual(reorder(u'ខ្នុ'), u'ខ្ន' + MARK + u'ុ')
        self.assertEqual(reorder(u'ត្រូ'), u'្រត' + MARK + u'ូ')
        self.assertEqual(reorder(u'ព្យួ'), u'ព្យ' + MARK + u'ួ')
        # vowel under LA or NYO go deeper
        self.assertEqual(reorder(u'ឡូ'), u'ឡ' + MARK + u'ូ')
        self.assertEqual(reorder(u'ញួ'), u'ញ' + MARK + u'ួ')
        # mark vowel after when there is coeng
        self.assertEqual(reorder(u'ក្បៀ'), u'េក្ប' + MARK + u'ៀ')

        # coeng RO must on left side
        self.assertEqual(reorder(u'ក្រ'), u'្រក')
        self.assertEqual(reorder(u'ស្ត្រ'), MARK + u'្រស្ត')
        # mark NYO when there is coeng
        self.assertEqual(reorder(u'ញ្ជ'), MARK + u'ញ្ជ')
        # coeng NYO under NYO is marked
        self.assertEqual(reorder(u'ញ្ញ'), MARK + u'ញ' + MARK + u'្ញ')
        # coeng NYO under other is normal
        self.assertEqual(reorder(u'ជ្ញ'), u'ជ្ញ')
        # coeng1 and coeng2, mark coeng2
        self.assertEqual(reorder(u'ក្ស្ម'), u'ក្ស' + MARK + u'្ម')

        # PA has no modification
        self.assertEqual(reorder(u'ប៉'), u'ប៉')
        # special case BA TRISSAP, mark vowel above
        self.assertEqual(reorder(u'ប៊ី'), u'ប៊' + MARK + u'ី')
        # base and shifter and vowel above, mark shifter
        self.assertEqual(reorder(u'ប៉ី'), u'ប' + MARK + u'៉ី')
        self.assertEqual(reorder(u'ស៊ី'), u'ស' + MARK + u'៊ី')
        # base and muusikatoan and samyok-sannya, mark shifter
        self.assertEqual(reorder(u'នំប៉័ង'), u'នំប' + MARK + u'៉'  + u'័ង')
        # shifter and sign above and vowel after, mark shifter
        self.assertEqual(reorder(u'ស៊ាំ'), u'ស' + MARK + u'៊' + u'ាំ')
        # shifter and sign above, mark sign
        self.assertEqual(reorder(u'អ៊ំ'), u'អ៊' + MARK + u'ំ')
        # double mark shifter when there is ceong and sign or vowel above
        self.assertEqual(reorder(u'ប្ប៉័ង'), u'ប្ប' + MARK + MARK + u'៉'  + u'័ង')

        # uncomplete coeng is still keep
        self.assertEqual(reorder(u'ក្'), u'ក្')
        self.assertEqual(reorder(u'ក្ក្'), u'ក្ក' + MARK + u'្')

        # render standalone vowel or sign with DOTCIRCLE
        self.assertEqual(reorder(u'ា'), DOTCIRCLE + u'ា')
        self.assertEqual(reorder(u'េ'), u'េ' + DOTCIRCLE)
        self.assertEqual(reorder(u'ើ'), u'េ'  + DOTCIRCLE + u'ី')
        self.assertEqual(reorder(u'ំ'), DOTCIRCLE + u'ំ')
        self.assertEqual(reorder(u'ោះ'), u'េ'  + DOTCIRCLE + u'ា' + DOTCIRCLE + u'ះ')

        # shifter is after ceong
        self.assertEqual(reorder(u'ន្ស៊ី'), u'ន្ស' + MARK + MARK + u'៊ី')

        # special case BA and sra A, get alway near to each other
        self.assertEqual(reorder(u'ប្រា'), u'្របា')
        self.assertEqual(reorder(u'ប្ដា'), u'បា្ដ')
        self.assertEqual(reorder(u'ប៉ា'), u'បា៉')
        self.assertEqual(reorder(u'ប្រៅ'), u'េ្របៅ')
        self.assertEqual(reorder(u'ប្ដៅ'), u'េបៅ្ដ')
        self.assertEqual(reorder(u'ប៉ៅ'), u'េបៅ៉')
        # except there is coeng between them
        self.assertEqual(reorder(u'ប្បា'), u'ប្បា')
        
        # other test of prevention
        # simple rendering
        self.assertEqual(reorder(u'គេ'), u'េគ')
        self.assertEqual(reorder(u'គោ'), u'េគា')
        self.assertEqual(reorder(u'កៅ'), u'េកៅ')
        self.assertEqual(reorder(u'លើ'), u'េលី')
        self.assertEqual(reorder(u'បៀ'), u'េបៀ')
        self.assertEqual(reorder(u'តឿ'), u'េតឿ')
        self.assertEqual(reorder(u'កាំ'), u'កាំ')
        # reorder of more than one cluster
        self.assertEqual(reorder(u'កាប់គោ'), u'កាប់េគា')
        self.assertEqual(reorder(u'ខាងលើ'), u'ខាងេលី')
        self.assertEqual(reorder(u'ចំពោះ'), u'ចំេពាះ')
        # mix with english text
        self.assertEqual(reorder(u'កកុះwelcomeកុម្ភៈ'), u'កកុះwelcomeកុម្ភៈ')
        # two shifter or 3 vowel or 4 sign
        self.assertEqual(reorder(u'៊៊'), DOTCIRCLE + u'៊' + DOTCIRCLE + u'៊')
        self.assertEqual(reorder(u'ាិី'), DOTCIRCLE + u'ា' + DOTCIRCLE + u'ិ' + DOTCIRCLE + u'ី')
        self.assertEqual(reorder(u'ំះ័'), DOTCIRCLE + u'ំ' + DOTCIRCLE + u'ះ' + DOTCIRCLE + u'័')
        # muusikatoan not convert when vowel is not high
        self.assertEqual(reorder(u'ម៉្ងៃ'), u'ៃម៉្ង')
        # two coengs with vowel that place on left and right (some bigger than normal)
        self.assertEqual(reorder(u'កញ្ច្រៀវ'), u'កេ' + MARK + u'្រ'  + MARK + u'ញ្ច' + MARK + u'ៀវ')
        self.assertEqual(reorder(u'កញ្ច្រោង'), u'កេ' + MARK + u'្រ' + MARK + u'ញ្ច' + MARK + u'ាង')
        # vowel which under coeng go one step deeper
        self.assertEqual(reorder(u'ប្ដូ'), u'ប្ដ' + MARK + u'ូ')
        # don't break the sign
        self.assertEqual(reorder(u'ចុះ'), u'ចុះ')
        self.assertEqual(reorder(u'នុ៎ះ'), u'នុ៎ះ')
        # change sign OM, not shifter
        self.assertEqual(reorder(u'អ៊ុំ'), u'អ៊ុ' + MARK + u'ំ' )
        # this is two cluster
        self.assertEqual(reorder(u'ាក'), DOTCIRCLE + u'ាក')



if __name__ == '__main__':
    unittest.main()