/usr/include/tesseract/cube_object.h is in libtesseract-dev 3.02.01-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | /**********************************************************************
* File: cube_object.h
* Description: Declaration of the Cube Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeObject class is the main class used to perform recognition of
// a specific char_samp as a single word.
// To recognize a word, a CubeObject is constructed for this word.
// A Call to RecognizeWord is then issued specifying the language model that
// will be used during recognition. If none is specified, the default language
// model in the CubeRecoContext is used. The CubeRecoContext is passed at
// construction time
//
// The typical usage pattern for Cube is shown below:
//
// // Create and initialize Tesseract object and get its
// // CubeRecoContext object (note that Tesseract object owns it,
// // so it will be freed when the Tesseract object is freed).
// tesseract::Tesseract *tess_obj = new tesseract::Tesseract();
// tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY);
// CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext();
// CHECK(cntxt != NULL) << "Unable to create a Cube reco context";
// .
// .
// .
// // Do this to recognize a word in pix whose co-ordinates are
// // (left,top,width,height)
// tesseract::CubeObject *cube_obj;
// cube_obj = new tesseract::CubeObject(cntxt, pix,
// left, top, width, height);
//
// // Get back Cube's list of answers
// tesseract::WordAltList *alt_list = cube_obj->RecognizeWord();
// CHECK(alt_list != NULL && alt_list->AltCount() > 0);
//
// // Get the string and cost of every alternate
// for (int alt = 0; alt < alt_list->AltCount(); alt++) {
// // Return the result as a UTF-32 string
// string_32 res_str32 = alt_list->Alt(alt);
// // Convert to UTF8 if need-be
// string res_str;
// CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str);
// // Get the string cost. This should get bigger as you go deeper
// // in the list
// int cost = alt_list->AltCost(alt);
// }
//
// // Call this once you are done recognizing this word
// delete cube_obj;
//
// // Call this once you are done recognizing all words with
// // for the current language
// delete tess_obj;
//
// Note that if the language supports "Italics" (see the CubeRecoContext), the
// RecognizeWord function attempts to de-slant the word.
#ifndef CUBE_OBJECT_H
#define CUBE_OBJECT_H
#include "img.h"
#include "char_samp.h"
#include "word_altlist.h"
#include "beam_search.h"
#include "cube_search_object.h"
#include "tess_lang_model.h"
#include "cube_reco_context.h"
namespace tesseract {
// minimum aspect ratio needed to normalize a char_samp before recognition
static const float kMinNormalizationAspectRatio = 3.5;
// minimum probability a top alt choice must meet before having
// deslanted processing applied to it
static const float kMinProbSkipDeslanted = 0.25;
class CubeObject {
public:
// Different flavors of constructor. They just differ in the way the
// word image is specified
CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp);
CubeObject(CubeRecoContext *cntxt, IMAGE *img,
int left, int top, int wid, int hgt);
CubeObject(CubeRecoContext *cntxt, Pix *pix,
int left, int top, int wid, int hgt);
~CubeObject();
// Perform the word recognition using the specified language mode. If none
// is specified, the default language model in the CubeRecoContext is used.
// Returns the sorted list of alternate word answers
WordAltList *RecognizeWord(LangModel *lang_mod = NULL);
// Same as RecognizeWord but recognizes as a phrase
WordAltList *RecognizePhrase(LangModel *lang_mod = NULL);
// Computes the cost of a specific string. This is done by performing
// recognition of a language model that allows only the specified word.
// The alternate list(s) will be permanently modified.
int WordCost(const char *str);
// Recognizes a single character and returns the list of results.
CharAltList *RecognizeChar();
// Returns the BeamSearch object that resulted from the last call to
// RecognizeWord
inline BeamSearch *BeamObj() const {
return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_);
}
// Returns the WordAltList object that resulted from the last call to
// RecognizeWord
inline WordAltList *AlternateList() const {
return (deslanted_ == true ? deslanted_alt_list_ : alt_list_);
}
// Returns the CubeSearchObject object that resulted from the last call to
// RecognizeWord
inline CubeSearchObject *SrchObj() const {
return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_);
}
// Returns the CharSamp object that resulted from the last call to
// RecognizeWord. Note that this object is not necessarily identical to the
// one passed at construction time as normalization might have occurred
inline CharSamp *CharSample() const {
return (deslanted_ == true ? deslanted_char_samp_ : char_samp_);
}
// Set the ownership of the CharSamp
inline void SetCharSampOwnership(bool own_char_samp) {
own_char_samp_ = own_char_samp;
}
protected:
// Normalize the CharSamp if its aspect ratio exceeds the below constant.
bool Normalize();
private:
// minimum segment count needed to normalize a char_samp before recognition
static const int kMinNormalizationSegmentCnt = 4;
// Data member initialization function
void Init();
// Free alternate lists.
void Cleanup();
// Perform the actual recognition using the specified language mode. If none
// is specified, the default language model in the CubeRecoContext is used.
// Returns the sorted list of alternate answers. Called by both
// RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false)
WordAltList *Recognize(LangModel *lang_mod, bool word_mode);
CubeRecoContext *cntxt_;
BeamSearch *beam_obj_;
BeamSearch *deslanted_beam_obj_;
bool offline_mode_;
bool own_char_samp_;
bool deslanted_;
CharSamp *char_samp_;
CharSamp *deslanted_char_samp_;
CubeSearchObject *srch_obj_;
CubeSearchObject *deslanted_srch_obj_;
WordAltList *alt_list_;
WordAltList *deslanted_alt_list_;
};
}
#endif // CUBE_OBJECT_H
|