/usr/include/tesseract/cube_line_segmenter.h is in libtesseract-dev 3.02.01-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | /**********************************************************************
* File: cube_page_segmenter.h
* Description: Declaration of the Cube Page Segmenter Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// TODO(ahmadab)
// This is really a makeshift line segmenter that works well for Arabic
// This should eventually be replaced by Ray Smith's Page segmenter
// There are lots of magic numbers below that were determined empirically
// but not thoroughly tested
#ifndef CUBE_LINE_SEGMENTER_H
#define CUBE_LINE_SEGMENTER_H
#include "cube_reco_context.h"
#include "allheaders.h"
namespace tesseract {
class CubeLineSegmenter {
public:
CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
~CubeLineSegmenter();
// Accessor functions
Pix *PostProcessedImage() {
if (init_ == false && Init() == false) {
return NULL;
}
return img_;
}
int ColumnCnt() {
if (init_ == false && Init() == false) {
return NULL;
}
return columns_->n;
}
Box *Column(int col) {
if (init_ == false && Init() == false) {
return NULL;
}
return columns_->boxa->box[col];
}
int LineCnt() {
if (init_ == false && Init() == false) {
return NULL;
}
return line_cnt_;
}
Pixa *ConComps() {
if (init_ == false && Init() == false) {
return NULL;
}
return con_comps_;
}
Pixaa *Columns() {
if (init_ == false && Init() == false) {
return NULL;
}
return columns_;
}
inline double AlefHgtEst() { return est_alef_hgt_; }
inline double DotHgtEst() { return est_dot_hgt_; }
Pix *Line(int line, Box **line_box);
private:
static const float kMinValidLineHgtRatio;
static const int kLineSepMorphMinHgt;
static const int kHgtBins;
static const int kMaxConnCompHgt;
static const int kMaxConnCompWid;
static const int kMaxHorzAspectRatio;
static const int kMaxVertAspectRatio;
static const int kMinWid;
static const int kMinHgt;
static const double kMaxValidLineRatio;
// Cube Reco context
CubeRecoContext *cntxt_;
// Original image
Pix *orig_img_;
// Post processed image
Pix *img_;
// Init flag
bool init_;
// Output Line and column info
int line_cnt_;
Pixaa *columns_;
Pixa *con_comps_;
Pixa *lines_pixa_;
// Estimates for sizes of ALEF and DOT needed for Arabic analysis
double est_alef_hgt_;
double est_dot_hgt_;
// Init the page analysis
bool Init();
// Performs line segmentation
bool LineSegment();
// Cleanup function
Pix *CleanUp(Pix *pix);
// compute validity ratio for a line
double ValidityRatio(Pix *line_mask_pix, Box *line_box);
// validate line
bool ValidLine(Pix *line_mask_pix, Box *line_box);
// split a line continuously until valid or fail
Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
// do a desperate attempt at cracking lines
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
// Checks of a line is too small
bool SmallLine(Box *line_box);
// Compute the connected components in a line
Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
Pixa **con_comps_pixa);
// create a union of two arbitrary pix
Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
// create a union of a pixa subset
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
// create a union of a pixa
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
// merges a number of lines into one line given a bounding box and a mask
bool MergeLine(Pix *line_mask_pix, Box *line_box,
Pixa *lines, Boxaa *lines_con_comps);
// Creates new set of lines from the computed columns
bool AddLines(Pixa *lines);
// Estimate the parameters of the font(s) used in the page
bool EstimateFontParams();
// perform a vertical Closing with the specified threshold
// returning the resulting conn comps as a pixa
Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
// Index the specific pixa using RTL reading order
int *IndexRTL(Pixa *pixa);
// Implements a rudimentary page & line segmenter
bool FindLines();
};
}
#endif // CUBE_LINE_SEGMENTER_H
|