/usr/share/lua/5.1/tongue/transliteration.lua is in lua-tongue 0.8-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | -- lib/tongue/transliteration.lua
--
-- Lua I18N library 'Tongue' - Transliteration of strings
--
-- Copyright 2016 Daniel Silverstone <dsilvers@digital-scurf.org>
--
-- For licence terms, see COPYING
--
--- Tongue language packs are internally always in UTF-8, but users may need
-- different encodings.
--
-- Since users might have all sorts of ways of specifying the desired character
-- encoding for their messages, Tongue provides a mechanism for deriving the
-- target character encoding and then transliterating to and from that
-- encoding.
--
-- @module tongue.transliteration
local iconv = require "iconv"
local util = require "tongue.util"
local converter = {}
--- Tongue character-set converter
--
-- Tongue deals internally in UTF-8 but may have to handle input and output
-- in any character set a user may choose. The converter object wrappers
-- a pair of iconv descriptors which manage that conversion.
--
--
-- @type converter
--- Convert a string to the user character set.
--
-- @tparam string input The input (UTF-8) string
-- @treturn string The output (user charset) string
-- @function touser
function converter:touser(input)
local s, v = self._touser:iconv(input)
return s or ("Error " .. tostring(v))
end
--- Convert a string from the user character set.
--
-- @tparam string input The input (user charset) string
-- @treturn string The output (UTF-8) string
-- @function fromuser
function converter:fromuser(input)
local s, v = self._fromuser:iconv(input)
return s or ("Error " .. tostring(v))
end
local converter_mt = {__index=converter}
---
-- @section tongue.transliteration
--- Retrieve a tongue encoding converter.
--
-- Construct and return an encoder which can convert between the provided
-- encoding and UTF-8 in either direction. The converter will be configured to
-- transliterate where possible and to replace bad or unknown codepoints so as
-- to ensure that the outputs are always valid.
--
-- If the desired encoding is UTF-8 then the encoder returned shall effectively
-- be a passthrough, excepting that invalid or malformed codepoints shall be
-- "cleaned up" by the encoder object.
--
-- @tparam string encoding The desired encoding to be used
-- @treturn encoder The bidirectional character encoder
-- @function get
local function get_converter(encoding)
local conv = {
_touser = iconv.open(encoding .. "//TRANSLIT//IGNORE", "UTF-8"),
_fromuser = iconv.open("UTF-8//TRANSLIT//IGNORE", encoding)
}
return setmetatable(conv, converter_mt)
end
--- Retrieve a tongue encoding converter based on the environment.
--
-- Firstly this function attempts to determine the encoding desired by the
-- "client" by means of examining the provided environment table (or the
-- process environment table if none was given). Once an encoding has been
-- determined somehow, tongue will return an encoder by calling through to
-- the @{get} function.
--
-- If no encoding can be determined from the provided table, tongue will assume
-- that UTF-8 is appropriate.
--
-- @tparam ?table env The environment to use (or nil to use the process env)
-- @treturn encoder The bidirectional character encoder
-- @function guess
local function guess_converter(env)
local function getenv(k)
if env then
return env[k] or ""
else
return os.getenv(k) or ""
end
end
-- glibc's approach is first to look at LC_ALL, then failing that
-- LC_MESSAGES, and failing that, LANG. (Well, LANG is considered first as
-- a fallback approach, but considering we're just hunting for an encoding
-- it'll be okay to consider it last)
local category = getenv "LC_ALL"
if category and category ~= "" then
local _, __, enc = util.split_category(category)
if enc == "" then enc = nil end
return get_converter(enc or "UTF-8"), category
end
category = getenv "LC_MESSAGES"
if category and category ~= "" then
local _, __, enc = util.split_category(category)
if enc == "" then enc = nil end
return get_converter(enc or "UTF-8"), category
end
category = getenv "LANG"
if category and category ~= "" then
local _, __, enc = util.split_category(category)
if enc == "" then enc = nil end
return get_converter(enc or "UTF-8"), category
end
return get_converter("UTF-8")
end
return {
get = get_converter,
guess = guess_converter,
}
|