This file is indexed.

/usr/include/tesseract/devanagari_processing.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// Copyright 2008 Google Inc. All Rights Reserved.
// Author: shobhitsaxena@google.com (Shobhit Saxena)

#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_

#include "ocrblock.h"
#include "params.h"

struct Pix;
struct Box;
struct Boxa;

extern
INT_VAR_H(devanagari_split_debuglevel, 0,
          "Debug level for split shiro-rekha process.");

extern
BOOL_VAR_H(devanagari_split_debugimage, 0,
           "Whether to create a debug image for split shiro-rekha process.");

class TBOX;
class IMAGE;

namespace tesseract {

class PixelHistogram {
 public:
  PixelHistogram() {
    hist_ = NULL;
    length_ = 0;
  }

  ~PixelHistogram() {
    Clear();
  }

  void Clear() {
    if (hist_) {
      delete[] hist_;
    }
    length_ = 0;
  }

  int* const hist() const {
    return hist_;
  }

  int length() const {
    return length_;
  }

  // Methods to construct histograms from images. These clear any existing data.
  void ConstructVerticalCountHist(Pix* pix);
  void ConstructHorizontalCountHist(Pix* pix);

  // This method returns the global-maxima for the histogram. The frequency of
  // the global maxima is returned in count, if specified.
  int GetHistogramMaximum(int* count) const;

 private:
  int* hist_;
  int length_;
};

class ShiroRekhaSplitter {
 public:
  enum SplitStrategy {
    NO_SPLIT = 0,   // No splitting is performed for the phase.
    MINIMAL_SPLIT,  // Blobs are split minimally.
    MAXIMAL_SPLIT   // Blobs are split maximally.
  };

  ShiroRekhaSplitter();
  virtual ~ShiroRekhaSplitter();

  // Top-level method to perform splitting based on current settings.
  // Returns true if a split was actually performed.
  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
  // splitting. If false, the ocr_split_strategy_ is used.
  bool Split(bool split_for_pageseg);

  // Clears the memory held by this object.
  void Clear();

  // Refreshes the words in the segmentation block list by using blobs in the
  // input blob list.
  // The segmentation block list must be set.
  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);

  // Returns true if the split strategies for pageseg and ocr are different.
  bool HasDifferentSplitStrategies() const {
    return pageseg_split_strategy_ != ocr_split_strategy_;
  }

  // This only keeps a copy of the block list pointer. At split call, the list
  // object should still be alive. This block list is used as a golden
  // segmentation when performing splitting.
  void set_segmentation_block_list(BLOCK_LIST* block_list) {
    segmentation_block_list_ = block_list;
  }

  static const int kUnspecifiedXheight = -1;

  void set_global_xheight(int xheight) {
    global_xheight_ = xheight;
  }

  void set_perform_close(bool perform) {
    perform_close_ = perform;
  }

  // Returns the image obtained from shiro-rekha splitting. The returned object
  // is owned by this class. Callers may want to clone the returned pix to keep
  // it alive beyond the life of ShiroRekhaSplitter object.
  Pix* splitted_image() {
    return splitted_image_;
  }

  // On setting the input image, a clone of it is owned by this class.
  void set_orig_pix(Pix* pix);

  // Returns the input image provided to the object. This object is owned by
  // this class. Callers may want to clone the returned pix to work with it.
  Pix* orig_pix() {
    return orig_pix_;
  }

  SplitStrategy ocr_split_strategy() const {
    return ocr_split_strategy_;
  }

  void set_ocr_split_strategy(SplitStrategy strategy) {
    ocr_split_strategy_ = strategy;
  }

  SplitStrategy pageseg_split_strategy() const {
    return pageseg_split_strategy_;
  }

  void set_pageseg_split_strategy(SplitStrategy strategy) {
    pageseg_split_strategy_ = strategy;
  }

  BLOCK_LIST* segmentation_block_list() {
    return segmentation_block_list_;
  }

  // This method dumps a debug image to the specified location.
  void DumpDebugImage(const char* filename) const;

  // This method returns the computed mode-height of blobs in the pix.
  // It also prunes very small blobs from calculation. Could be used to provide
  // a global xheight estimate for images which have the same point-size text.
  static int GetModeHeight(Pix* pix);

 private:
  // Method to perform a close operation on the input image. The xheight
  // estimate decides the size of sel used.
  static void PerformClose(Pix* pix, int xheight_estimate);

  // This method resolves the cc bbox to a particular row and returns the row's
  // xheight. This uses block_list_ if available, else just returns the
  // global_xheight_ estimate currently set in the object.
  int GetXheightForCC(Box* cc_bbox);

  // Returns a list of regions (boxes) which should be cleared in the original
  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
  // (or less) word only. Xheight measure could be the global estimate, the row
  // estimate, or unspecified. If unspecified, over splitting may occur, since a
  // conservative estimate of stroke width along with an associated multiplier
  // is used in its place. It is advisable to have a specified xheight when
  // splitting for classification/training.
  void SplitWordShiroRekha(SplitStrategy split_strategy,
                           Pix* pix,
                           int xheight,
                           int word_left,
                           int word_top,
                           Boxa* regions_to_clear);

  // Returns a new box object for the corresponding TBOX, based on the original
  // image's coordinate system.
  Box* GetBoxForTBOX(const TBOX& tbox) const;

  // This method returns y-extents of the shiro-rekha computed from the input
  // word image.
  static void GetShiroRekhaYExtents(Pix* word_pix,
                                    int* shirorekha_top,
                                    int* shirorekha_bottom,
                                    int* shirorekha_ylevel);

  Pix* orig_pix_;         // Just a clone of the input image passed.
  Pix* splitted_image_;   // Image produced after the last splitting round. The
                          // object is owned by this class.
  SplitStrategy pageseg_split_strategy_;
  SplitStrategy ocr_split_strategy_;
  Pix* debug_image_;
  // This block list is used as a golden segmentation when performing splitting.
  BLOCK_LIST* segmentation_block_list_;
  int global_xheight_;
  bool perform_close_;  // Whether a morphological close operation should be
                        // performed before CCs are run through splitting.
};

}  // namespace tesseract.

#endif  // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_