This file is indexed.

/usr/include/tesseract/tuning_params.h is in libtesseract-dev 3.02.01-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/**********************************************************************
 * File:        tuning_params.h
 * Description: Declaration of the Tuning Parameters Base Class
 * Author:    Ahmad Abdulkader
 * Created:   2008
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

// The TuningParams class abstracts all the parameters that can be learned or
// tuned during the training process. It is a base class that all TuningParams
// classes should inherit from.

#ifndef TUNING_PARAMS_H
#define TUNING_PARAMS_H

#include <string>
#ifdef USE_STD_NAMESPACE
using std::string;
#endif

namespace tesseract {
class TuningParams {
 public:
  enum type_classifer {
    NN,
    HYBRID_NN
  };
  enum type_feature {
    BMP,
    CHEBYSHEV,
    HYBRID
  };

  TuningParams() {}
  virtual ~TuningParams() {}
  // Accessor functions
  inline double RecoWgt() const { return reco_wgt_; }
  inline double SizeWgt() const { return size_wgt_; }
  inline double CharBigramWgt() const { return char_bigrams_wgt_; }
  inline double WordUnigramWgt() const { return word_unigrams_wgt_; }
  inline int MaxSegPerChar() const { return max_seg_per_char_; }
  inline int BeamWidth() const { return beam_width_; }
  inline int TypeClassifier() const { return tp_classifier_; }
  inline int TypeFeature() const { return tp_feat_; }
  inline int ConvGridSize() const { return conv_grid_size_; }
  inline int HistWindWid() const { return hist_wind_wid_; }
  inline int MinConCompSize() const { return min_con_comp_size_; }
  inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; }
  inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; }
  inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; }
  inline double CombinerRunThresh() const { return combiner_run_thresh_; }
  inline double CombinerClassifierThresh() const {
    return combiner_classifier_thresh_; }

  inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; }
  inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; }
  inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; }
  inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; }
  inline void SetMaxSegPerChar(int max_seg_per_char) {
    max_seg_per_char_ = max_seg_per_char;
  }
  inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; }
  inline void SetTypeClassifier(type_classifer tp_classifier) {
    tp_classifier_ = tp_classifier;
  }
  inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;}
  inline void SetHistWindWid(int hist_wind_wid) {
    hist_wind_wid_ = hist_wind_wid;
  }

  virtual bool Save(string file_name) = 0;
  virtual bool Load(string file_name) = 0;

 protected:
  // weight of recognition cost. This includes the language model cost
  double reco_wgt_;
  // weight of size cost
  double size_wgt_;
  // weight of character bigrams cost
  double char_bigrams_wgt_;
  // weight of word unigrams cost
  double word_unigrams_wgt_;
  // Maximum number of segments per character
  int max_seg_per_char_;
  // Beam width equal to the maximum number of nodes kept in the beam search
  // trellis column after pruning
  int beam_width_;
  // Classifier type: See enum type_classifer for classifier types
  type_classifer tp_classifier_;
  // Feature types: See enum type_feature for feature types
  type_feature   tp_feat_;
  // Grid size to scale a grapheme bitmap used by the BMP feature type
  int conv_grid_size_;
  // Histogram window size as a ratio of the word height used in computing
  // the vertical pixel density histogram in the segmentation algorithm
  int hist_wind_wid_;
  // Minimum possible size of a connected component
  int min_con_comp_size_;
  // Maximum aspect ratio of a word (width / height)
  double max_word_aspect_ratio_;
  // Minimum ratio relative to the line height of a gap to be considered as
  // a word break
  double min_space_height_ratio_;
  // Maximum ratio relative to the line height of a gap to be considered as
  // a definite word break
  double max_space_height_ratio_;
  // When Cube and Tesseract are run in combined mode, only run
  // combiner classifier when tesseract confidence is below this
  // threshold. When Cube is run without Tesseract, this is ignored.
  double combiner_run_thresh_;
  // When Cube and tesseract are run in combined mode, threshold on
  // output of combiner binary classifier (chosen from ROC during
  // combiner training). When Cube is run without Tesseract, this is ignored.
  double combiner_classifier_thresh_;
};
}

#endif  // TUNING_PARAMS_H