/usr/bin/greekocr4gamera is in python-gamera.toolkits.greekocr 1.0.1-10.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | #!/usr/bin/python
# -*- mode: python; indent-tabs-mode: nil; tab-width: 3 -*-
# vim: set tabstop=3 shiftwidth=3 expandtab:
# Copyright (C) 2010-2011 Christian Brandt
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St., Fifth floor, Boston, MA 02110-1301, USA.
# This just simply runs the greekocr toolkits main function
import sys
def usage():
usage = "Usage:\n"
usage += " greekocr4gamera.py -x <traindata> [options] <imagefile>\n"
usage += "\n"
usage += " Options:\n"
usage += " --wholistic wholistic segmentation mode (default)\n"
usage += " -w short for --wholistic\n"
usage += " --separatistic separatistic segmentation mode\n"
usage += " -s short for --separatistic\n"
usage += "\n"
usage += " --unicode <file> specify filename for unicode output\n"
usage += " -u <file> short for --unicode\n"
usage += " --teubner <file.tex> specify filename for teubner TeX output\n"
usage += " -t <file> short for --teubner\n"
usage += "\n"
usage += " --deskew do a skew correction (recommended)\n"
usage += " --filter filter out very large (images) and very\n"
usage += " small components (noise)\n"
usage += "\n"
usage += " --debug save debug-images\n"
usage += " debug_lines.png debug_words.png debug_chars.png\n"
usage += " -d short for --debug\n"
sys.stderr.write(usage)
options = {}
args = sys.argv[1:]
i = 0
while i < len(args):
if args[i] in ("-x", "--trainingdata"):
i += 1
options["trainingdata"] = args[i]
elif args[i] in ("--help", "-h"):
usage()
sys.exit(0)
elif args[i] in ("--wholistic", "-w"):
options["mode"] = "wholistic"
elif args[i] in ("--separatistic", "-s"):
options["mode"] = "separatistic"
elif args[i] in ("-u","--unicode"):
i += 1
options["unicodeoutfile"] = args[i]
elif args[i] in ("-t", "--teubner"):
i += 1
options["teubneroutfile"] = args[i]
elif args[i] in ("-d", "--debug"):
options["debug"] = True
elif args[i] in ("--deskew"):
options["deskew"] = True
elif args[i] in ("--filter"):
options["filter"] = True
else:
options["imagefile"] = args[i]
i += 1
if not options.has_key("trainingdata"):
print "No Trainingdata given"
usage()
exit(1)
if not options.has_key("mode"):
options["mode"] = "wholistic"
if not options.has_key("imagefile"):
print "No filename given"
usage()
exit(2)
from gamera.core import *
from gamera.plugins.listutilities import median
from gamera.toolkits.greekocr import GreekOCR
g = GreekOCR()
g.mode = options["mode"]
g.load_trainingdata(options["trainingdata"])
image = load_image(options["imagefile"])
if image.data.pixel_type != ONEBIT:
image = image.to_onebit()
if options.has_key("filter") and options["filter"] == True:
count = 0
ccs = image.cc_analysis()
if options.has_key("debug") and options["debug"] == True:
print "filter started on",len(ccs) ,"elements..."
median_black_area = median([cc.black_area()[0] for cc in ccs])
for cc in ccs:
if(cc.black_area()[0] > (median_black_area * 10)):
cc.fill_white()
del cc
count = count + 1
for cc in ccs:
if(cc.black_area()[0] < (median_black_area / 10)):
cc.fill_white()
del cc
count = count + 1
if options.has_key("debug") and options["debug"] == True:
print "filter done.",len(ccs)-count,"elements left."
if options.has_key("deskew") and options["deskew"] == True:
#from gamera.toolkits.otr.otr_staff import *
if options.has_key("debug") and options["debug"] == True:
print "\ntry to skew correct..."
rotation = image.rotation_angle_projections(-10,10)[0]
img = image.rotate(rotation,0)
if options.has_key("debug") and options["debug"] == True:
print "rotated with",rotation,"angle"
output = g.process_image(image)
if options.has_key("debug") and options["debug"] == True:
g.save_debug_images()
if options.has_key("unicodeoutfile"):
g.save_text_unicode(options["unicodeoutfile"])
elif options.has_key("teubneroutfile"):
g.save_text_teubner(options["teubneroutfile"])
else:
print output
|