/usr/share/doc/diveintopython-zh/examples/unicode2koi8r.py is in diveintopython-zh 5.4b-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | """Convert Cyrillic from iso-8859-1 Unicode-encoded to KOI8-R-encoded
This script is used during the build process of the Russian translation
of "Dive Into Python" (http://diveintopython.org/).
It takes one argument, which can be either an HTML file or a directory.
If a file, it converts the file in place; if a directory, it converts
every HTML file in the immediate directory (but not recursively).
Safe but pointless to run more than once on the same file or directory.
"""
__author__ = "Mark Pilgrim (mark@diveintopython.org)"
__version__ = "$Revision: 1.2 $"
__date__ = "$Date: 2004/05/05 21:57:19 $"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"
import os
import sys
import re
unicodeToKOI8R = { \
'Ё': '\xb3',
'А': '\xe1',
'Б': '\xe2',
'В': '\xf7',
'Г': '\xe7',
'Д': '\xe4',
'Е': '\xe5',
'Ж': '\xf6',
'З': '\xfa',
'И': '\xe9',
'Й': '\xea',
'К': '\xeb',
'Л': '\xec',
'М': '\xed',
'Н': '\xee',
'О': '\xef',
'П': '\xf0',
'Р': '\xf2',
'С': '\xf3',
'Т': '\xf4',
'У': '\xf5',
'Ф': '\xe6',
'Х': '\xe8',
'Ц': '\xe3',
'Ч': '\xfe',
'Ш': '\xfb',
'Щ': '\xfd',
'Ъ': '\xff',
'Ы': '\xf9',
'Ь': '\xf8',
'Э': '\xfc',
'Ю': '\xe0',
'Я': '\xf1',
'а': '\xc1',
'б': '\xc2',
'в': '\xd7',
'г': '\xc7',
'д': '\xc4',
'е': '\xc5',
'ж': '\xd6',
'з': '\xda',
'и': '\xc9',
'й': '\xca',
'к': '\xcb',
'л': '\xcc',
'м': '\xcd',
'н': '\xce',
'о': '\xcf',
'п': '\xd0',
'р': '\xd2',
'с': '\xd3',
'т': '\xd4',
'у': '\xd5',
'ф': '\xc6',
'х': '\xc8',
'ц': '\xc3',
'ч': '\xde',
'ш': '\xdb',
'щ': '\xdd',
'ъ': '\xdf',
'ы': '\xd9',
'ь': '\xd8',
'э': '\xdc',
'ю': '\xc0',
'я': '\xd1',
'ё': '\xa3' }
unicodePattern = re.compile(r'&#[0-9]{4,4};')
charsetPattern = re.compile(r'ISO-8859-1', re.IGNORECASE)
def translateMatch(match):
unicode = match.group(0)
if unicodeToKOI8R.has_key(unicode):
return unicodeToKOI8R[unicode]
else:
return unicode
def translateBuffer(buffer):
buffer = unicodePattern.sub(translateMatch, buffer)
buffer = charsetPattern.sub('KOI8-R', buffer)
return buffer
def translateFile(filename, outfilename=None):
if not outfilename:
outfilename = filename
fsock = open(filename)
buffer = fsock.read()
fsock.close()
buffer = translateBuffer(buffer)
fsock = open(outfilename, 'wb')
fsock.write(buffer)
fsock.close()
def htmlFilter(filename):
return os.path.splitext(filename)[1] == '.html'
def translateDirectory(directoryname, filterFunc=htmlFilter):
fileList = [os.path.join(directoryname, f) for f in os.listdir(directoryname)]
fileList = filter(filterFunc, fileList)
map(translateFile, fileList)
if __name__ == "__main__":
name = sys.argv[1]
if os.path.isdir(name):
translateDirectory(name)
else:
translateFile(name)
|