/usr/include/tesseract/devanagari_processing.h is in libtesseract-dev 3.02.01-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | // Copyright 2008 Google Inc. All Rights Reserved.
// Author: shobhitsaxena@google.com (Shobhit Saxena)
#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#include "ocrblock.h"
#include "params.h"
struct Pix;
struct Box;
struct Boxa;
extern
INT_VAR_H(devanagari_split_debuglevel, 0,
"Debug level for split shiro-rekha process.");
extern
BOOL_VAR_H(devanagari_split_debugimage, 0,
"Whether to create a debug image for split shiro-rekha process.");
class TBOX;
class IMAGE;
namespace tesseract {
class PixelHistogram {
public:
PixelHistogram() {
hist_ = NULL;
length_ = 0;
}
~PixelHistogram() {
Clear();
}
void Clear() {
if (hist_) {
delete[] hist_;
}
length_ = 0;
}
int* const hist() const {
return hist_;
}
int length() const {
return length_;
}
// Methods to construct histograms from images. These clear any existing data.
void ConstructVerticalCountHist(Pix* pix);
void ConstructHorizontalCountHist(Pix* pix);
// This method returns the global-maxima for the histogram. The frequency of
// the global maxima is returned in count, if specified.
int GetHistogramMaximum(int* count) const;
private:
int* hist_;
int length_;
};
class ShiroRekhaSplitter {
public:
enum SplitStrategy {
NO_SPLIT = 0, // No splitting is performed for the phase.
MINIMAL_SPLIT, // Blobs are split minimally.
MAXIMAL_SPLIT // Blobs are split maximally.
};
ShiroRekhaSplitter();
virtual ~ShiroRekhaSplitter();
// Top-level method to perform splitting based on current settings.
// Returns true if a split was actually performed.
// If split_for_pageseg is true, the pageseg_split_strategy_ is used for
// splitting. If false, the ocr_split_strategy_ is used.
bool Split(bool split_for_pageseg);
// Clears the memory held by this object.
void Clear();
// Refreshes the words in the segmentation block list by using blobs in the
// input blob list.
// The segmentation block list must be set.
void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
// Returns true if the split strategies for pageseg and ocr are different.
bool HasDifferentSplitStrategies() const {
return pageseg_split_strategy_ != ocr_split_strategy_;
}
// This only keeps a copy of the block list pointer. At split call, the list
// object should still be alive. This block list is used as a golden
// segmentation when performing splitting.
void set_segmentation_block_list(BLOCK_LIST* block_list) {
segmentation_block_list_ = block_list;
}
static const int kUnspecifiedXheight = -1;
void set_global_xheight(int xheight) {
global_xheight_ = xheight;
}
void set_perform_close(bool perform) {
perform_close_ = perform;
}
// Returns the image obtained from shiro-rekha splitting. The returned object
// is owned by this class. Callers may want to clone the returned pix to keep
// it alive beyond the life of ShiroRekhaSplitter object.
Pix* splitted_image() {
return splitted_image_;
}
// On setting the input image, a clone of it is owned by this class.
void set_orig_pix(Pix* pix);
// Returns the input image provided to the object. This object is owned by
// this class. Callers may want to clone the returned pix to work with it.
Pix* orig_pix() {
return orig_pix_;
}
SplitStrategy ocr_split_strategy() const {
return ocr_split_strategy_;
}
void set_ocr_split_strategy(SplitStrategy strategy) {
ocr_split_strategy_ = strategy;
}
SplitStrategy pageseg_split_strategy() const {
return pageseg_split_strategy_;
}
void set_pageseg_split_strategy(SplitStrategy strategy) {
pageseg_split_strategy_ = strategy;
}
BLOCK_LIST* segmentation_block_list() {
return segmentation_block_list_;
}
// This method dumps a debug image to the specified location.
void DumpDebugImage(const char* filename) const;
// This method returns the computed mode-height of blobs in the pix.
// It also prunes very small blobs from calculation. Could be used to provide
// a global xheight estimate for images which have the same point-size text.
static int GetModeHeight(Pix* pix);
private:
// Method to perform a close operation on the input image. The xheight
// estimate decides the size of sel used.
static void PerformClose(Pix* pix, int xheight_estimate);
// This method resolves the cc bbox to a particular row and returns the row's
// xheight. This uses block_list_ if available, else just returns the
// global_xheight_ estimate currently set in the object.
int GetXheightForCC(Box* cc_bbox);
// Returns a list of regions (boxes) which should be cleared in the original
// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
// (or less) word only. Xheight measure could be the global estimate, the row
// estimate, or unspecified. If unspecified, over splitting may occur, since a
// conservative estimate of stroke width along with an associated multiplier
// is used in its place. It is advisable to have a specified xheight when
// splitting for classification/training.
void SplitWordShiroRekha(SplitStrategy split_strategy,
Pix* pix,
int xheight,
int word_left,
int word_top,
Boxa* regions_to_clear);
// Returns a new box object for the corresponding TBOX, based on the original
// image's coordinate system.
Box* GetBoxForTBOX(const TBOX& tbox) const;
// This method returns y-extents of the shiro-rekha computed from the input
// word image.
static void GetShiroRekhaYExtents(Pix* word_pix,
int* shirorekha_top,
int* shirorekha_bottom,
int* shirorekha_ylevel);
Pix* orig_pix_; // Just a clone of the input image passed.
Pix* splitted_image_; // Image produced after the last splitting round. The
// object is owned by this class.
SplitStrategy pageseg_split_strategy_;
SplitStrategy ocr_split_strategy_;
Pix* debug_image_;
// This block list is used as a golden segmentation when performing splitting.
BLOCK_LIST* segmentation_block_list_;
int global_xheight_;
bool perform_close_; // Whether a morphological close operation should be
// performed before CCs are run through splitting.
};
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
|