/usr/share/pyshared/translate/convert/csv2po.py is in translate-toolkit 1.10.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | #!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2003-2006 Zuza Software Foundation
#
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
"""Convert Comma-Separated Value (.csv) files to Gettext PO localization files.
See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/csv2po.html
for examples and usage instructions.
"""
import sys
from translate.storage import po
from translate.storage import csvl10n
def replacestrings(source, *pairs):
"""Use ``pairs`` of ``(original, replacement)`` to replace text found in
``source``.
:param source: String to on which ``pairs`` of strings are to be replaced
:type source: String
:param \*pairs: Strings to be matched and replaced
:type \*pairs: One or more tuples of (original, replacement)
:return: String with ``*pairs`` of strings replaced
"""
for orig, new in pairs:
source = source.replace(orig, new)
return source
def quotecsvstr(source):
return '"' + \
replacestrings(source,
('\\"', '"'), ('"', '\\"'),
("\\\\'", "\\'"), ('\\\\n', '\\n')) + \
'"'
def simplify(string):
return filter(type(string).isalnum, string)
class csv2po:
"""a class that takes translations from a .csv file and puts them in a
.po file"""
def __init__(self, templatepo=None, charset=None, duplicatestyle="keep"):
"""construct the converter..."""
self.pofile = templatepo
self.charset = charset
self.duplicatestyle = duplicatestyle
self.commentindex = {}
self.sourceindex = {}
self.simpleindex = {}
self.csvfile = None
self.duplicatecomments = []
if self.pofile is not None:
self.unmatched = 0
self.makeindex()
def makeindex(self):
"""makes indexes required for searching..."""
for pounit in self.pofile.units:
joinedcomment = " ".join(pounit.getlocations())
source = pounit.source
# the definitive way to match is by source comment (joinedcomment)
if joinedcomment in self.commentindex:
# unless more than one thing matches...
self.duplicatecomments.append(joinedcomment)
else:
self.commentindex[joinedcomment] = pounit
# do simpler matching in case things have been mangled...
simpleid = simplify(source)
# but check for duplicates
if (simpleid in self.simpleindex and
not (source in self.sourceindex)):
# keep a list of them...
self.simpleindex[simpleid].append(pounit)
else:
self.simpleindex[simpleid] = [pounit]
# also match by standard msgid
self.sourceindex[source] = pounit
for comment in self.duplicatecomments:
if comment in self.commentindex:
del self.commentindex[comment]
def convertunit(self, csvunit):
"""converts csv unit to po unit"""
pounit = po.pounit(encoding="UTF-8")
if csvunit.location:
pounit.addlocation(csvunit.location)
pounit.source = csvunit.source
pounit.target = csvunit.target
return pounit
def handlecsvunit(self, csvunit):
"""handles reintegrating a csv unit into the .po file"""
if (len(csvunit.location.strip()) > 0 and
csvunit.location in self.commentindex):
pounit = self.commentindex[csvunit.location]
elif csvunit.source in self.sourceindex:
pounit = self.sourceindex[csvunit.source]
elif simplify(csvunit.source) in self.simpleindex:
thepolist = self.simpleindex[simplify(csvunit.source)]
if len(thepolist) > 1:
csvfilename = getattr(self.csvfile, "filename", "(unknown)")
matches = "\n ".join(["possible match: " + pounit.source for pounit in thepolist])
print >> sys.stderr, "%s - csv entry not found in pofile, multiple matches found:\n location\t%s\n original\t%s\n translation\t%s\n %s" % \
(csvfilename, csvunit.location,
csvunit.source, csvunit.target, matches)
self.unmatched += 1
return
pounit = thepolist[0]
else:
csvfilename = getattr(self.csvfile, "filename", "(unknown)")
print >> sys.stderr, "%s - csv entry not found in pofile:\n location\t%s\n original\t%s\n translation\t%s" % \
(csvfilename, csvunit.location,
csvunit.source, csvunit.target)
self.unmatched += 1
return
if pounit.hasplural():
# we need to work out whether we matched the singular or the plural
singularid = pounit.source.strings[0]
pluralid = pounit.source.strings[1]
if csvunit.source == singularid:
pounit.msgstr[0] = csvunit.target
elif csvunit.source == pluralid:
pounit.msgstr[1] = csvunit.target
elif simplify(csvunit.source) == simplify(singularid):
pounit.msgstr[0] = csvunit.target
elif simplify(csvunit.source) == simplify(pluralid):
pounit.msgstr[1] = csvunit.target
else:
print >> sys.stderr, "couldn't work out singular or plural: %r, %r, %r" % \
(csvunit.source, singularid, pluralid)
self.unmatched += 1
return
else:
pounit.target = csvunit.target
def convertstore(self, thecsvfile):
"""converts a csvfile to a pofile, and returns it. uses templatepo if
given at construction"""
self.csvfile = thecsvfile
if self.pofile is None:
self.pofile = po.pofile()
mergemode = False
else:
mergemode = True
if self.pofile.units and self.pofile.units[0].isheader():
targetheader = self.pofile.units[0]
self.pofile.updateheader(content_type="text/plain; charset=UTF-8",
content_transfer_encoding="8bit")
else:
targetheader = self.pofile.makeheader(charset="UTF-8",
encoding="8bit")
targetheader.addnote("extracted from %s" % self.csvfile.filename,
"developer")
mightbeheader = True
for csvunit in self.csvfile.units:
#if self.charset is not None:
# csvunit.source = csvunit.source.decode(self.charset)
# csvunit.target = csvunit.target.decode(self.charset)
if mightbeheader:
# ignore typical header strings...
mightbeheader = False
if csvunit.match_header():
continue
if (len(csvunit.location.strip()) == 0 and
csvunit.source.find("Content-Type:") != -1):
continue
if mergemode:
self.handlecsvunit(csvunit)
else:
pounit = self.convertunit(csvunit)
self.pofile.addunit(pounit)
self.pofile.removeduplicates(self.duplicatestyle)
return self.pofile
def convertcsv(inputfile, outputfile, templatefile, charset=None,
columnorder=None, duplicatestyle="msgctxt"):
"""reads in inputfile using csvl10n, converts using csv2po, writes to
outputfile"""
inputstore = csvl10n.csvfile(inputfile, fieldnames=columnorder)
if templatefile is None:
convertor = csv2po(charset=charset, duplicatestyle=duplicatestyle)
else:
templatestore = po.pofile(templatefile)
convertor = csv2po(templatestore, charset=charset,
duplicatestyle=duplicatestyle)
outputstore = convertor.convertstore(inputstore)
if outputstore.isempty():
return 0
outputfile.write(str(outputstore))
return 1
def main(argv=None):
from translate.convert import convert
formats = {
("csv", "po"): ("po", convertcsv),
("csv", "pot"): ("po", convertcsv),
("csv", None): ("po", convertcsv),
}
parser = convert.ConvertOptionParser(formats, usetemplates=True,
usepots=True,
description=__doc__)
parser.add_option("", "--charset", dest="charset", default=None,
help="set charset to decode from csv files", metavar="CHARSET"
)
parser.add_option("", "--columnorder", dest="columnorder", default=None,
help="specify the order and position of columns (location,source,target)"
)
parser.add_duplicates_option()
parser.passthrough.append("charset")
parser.passthrough.append("columnorder")
parser.run(argv)
if __name__ == '__main__':
main()
|