/usr/share/pyshared/translate/convert/html2po.py is in translate-toolkit 1.10.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | #!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2004-2006 Zuza Software Foundation
#
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
"""Convert HTML files to Gettext PO localization files.
See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/html2po.html
for examples and usage instructions.
"""
from translate.storage import po
from translate.storage import html
class html2po:
def convertfile(self, inputfile, filename, includeuntagged=False,
duplicatestyle="msgctxt", keepcomments=False):
"""converts a html file to .po format"""
thetargetfile = po.pofile()
htmlparser = html.htmlfile(includeuntaggeddata=includeuntagged,
inputfile=inputfile)
for htmlunit in htmlparser.units:
thepo = thetargetfile.addsourceunit(htmlunit.source)
thepo.addlocations(htmlunit.getlocations())
if keepcomments:
thepo.addnote(htmlunit.getnotes(), "developer")
thetargetfile.removeduplicates(duplicatestyle)
return thetargetfile
def converthtml(inputfile, outputfile, templates, includeuntagged=False,
pot=False, duplicatestyle="msgctxt", keepcomments=False):
"""reads in stdin using fromfileclass, converts using convertorclass,
writes to stdout"""
convertor = html2po()
outputfilepos = outputfile.tell()
outputstore = convertor.convertfile(inputfile, getattr(inputfile, "name",
"unknown"),
includeuntagged,
duplicatestyle=duplicatestyle,
keepcomments=keepcomments)
outputfile.write(str(outputstore))
return 1
def main(argv=None):
from translate.convert import convert
from translate.misc import stdiotell
import sys
sys.stdout = stdiotell.StdIOWrapper(sys.stdout)
formats = {"html": ("po", converthtml),
"htm": ("po", converthtml),
"xhtml": ("po", converthtml),
None: ("po", converthtml),
}
parser = convert.ConvertOptionParser(formats, usepots=True,
description=__doc__)
parser.add_option("-u", "--untagged", dest="includeuntagged",
default=False, action="store_true",
help="include untagged sections")
parser.passthrough.append("includeuntagged")
parser.add_option("--keepcomments", dest="keepcomments", default=False,
action="store_true",
help="preserve html comments as translation notes in the output")
parser.passthrough.append("keepcomments")
parser.add_duplicates_option()
parser.passthrough.append("pot")
parser.run(argv)
if __name__ == '__main__':
main()
|