This file is indexed.

/usr/include/tesseract/ratngs.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
/**********************************************************************
 * File:        ratngs.h  (Formerly ratings.h)
 * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
 * Author:      Ray Smith
 * Created:     Thu Apr 23 11:40:38 BST 1992
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#ifndef           RATNGS_H
#define           RATNGS_H

#include <assert.h>

#include "clst.h"
#include "genericvector.h"
#include "notdll.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"

class BLOB_CHOICE: public ELIST_LINK
{
  public:
    BLOB_CHOICE() {
      unichar_id_ = INVALID_UNICHAR_ID;
      fontinfo_id_ = -1;
      fontinfo_id2_ = -1;
      rating_ = MAX_FLOAT32;
      certainty_ = -MAX_FLOAT32;
      script_id_ = -1;
      language_model_state_ = NULL;
      min_xheight_ = 0;
      max_xheight_ = 0;
      adapted_ = false;
    }
    BLOB_CHOICE(UNICHAR_ID src_unichar_id,  // character id
                float src_rating,          // rating
                float src_cert,            // certainty
                inT16 src_fontinfo_id,      // font
                inT16 src_fontinfo_id2,     // 2nd choice font
                int script_id,             // script
                inT16 min_xheight,         // min xheight in image pixel units
                inT16 max_xheight,         // max xheight allowed by this char
                bool adapted);             // adapted match or not
    BLOB_CHOICE(const BLOB_CHOICE &other);
    ~BLOB_CHOICE() {}

    UNICHAR_ID unichar_id() const {
      return unichar_id_;
    }
    float rating() const {
      return rating_;
    }
    float certainty() const {
      return certainty_;
    }
    inT16 fontinfo_id() const {
      return fontinfo_id_;
    }
    inT16 fontinfo_id2() const {
      return fontinfo_id2_;
    }
    int script_id() const {
      return script_id_;
    }
    void *language_model_state() {
      return language_model_state_;
    }
    inT16 xgap_before() const {
      return xgap_before_;
    }
    inT16 xgap_after() const {
      return xgap_after_;
    }
    inT16 min_xheight() const {
      return min_xheight_;
    }
    inT16 max_xheight() const {
      return max_xheight_;
    }
    bool adapted() const {
      return adapted_;
    }

    void set_unichar_id(UNICHAR_ID newunichar_id) {
      unichar_id_ = newunichar_id;
    }
    void set_rating(float newrat) {
      rating_ = newrat;
    }
    void set_certainty(float newrat) {
      certainty_ = newrat;
    }
    void set_fontinfo_id(inT16 newfont) {
      fontinfo_id_ = newfont;
    }
    void set_fontinfo_id2(inT16 newfont) {
      fontinfo_id2_ = newfont;
    }
    void set_script(int newscript_id) {
      script_id_ = newscript_id;
    }
    void set_language_model_state(void *language_model_state) {
      language_model_state_ = language_model_state;
    }
    void set_xgap_before(inT16 gap) {
      xgap_before_ = gap;
    }
    void set_xgap_after(inT16 gap) {
      xgap_after_ = gap;
    }
    void set_adapted(bool adapted) {
      adapted_ = adapted;
    }
    static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
      BLOB_CHOICE* choice = new BLOB_CHOICE;
      *choice = *src;
      return choice;
    }
    void print(const UNICHARSET *unicharset) {
      tprintf("r%.2f c%.2f : %d %s", rating_, certainty_, unichar_id_,
              (unicharset == NULL) ? "" :
              unicharset->debug_str(unichar_id_).string());
    }

 private:
  UNICHAR_ID unichar_id_;          // unichar id
  inT16 fontinfo_id_;              // char font information
  inT16 fontinfo_id2_;             // 2nd choice font information
  float rating_;                  // size related
  float certainty_;               // absolute
  int script_id_;
  // Stores language model information about this BLOB_CHOICE. Used during
  // the segmentation search for BLOB_CHOICEs in BLOB_CHOICE_LISTs that are
  // recorded in the ratings matrix.
  // The pointer is owned/managed by the segmentation search.
  void *language_model_state_;
  inT16 xgap_before_;
  inT16 xgap_after_;
  // X-height range (in image pixels) that this classification supports.
  inT16 min_xheight_;
  inT16 max_xheight_;
  bool adapted_;  // true if this is a match from adapted templates
};

// Make BLOB_CHOICE listable.
ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST)

// Permuter codes used in WERD_CHOICEs.
enum PermuterType {
  NO_PERM,            // 0
  PUNC_PERM,          // 1
  TOP_CHOICE_PERM,    // 2
  LOWER_CASE_PERM,    // 3
  UPPER_CASE_PERM,    // 4
  NGRAM_PERM,         // 5
  NUMBER_PERM,        // 6
  USER_PATTERN_PERM,  // 7
  SYSTEM_DAWG_PERM,   // 8
  DOC_DAWG_PERM,      // 9
  USER_DAWG_PERM,     // 10
  FREQ_DAWG_PERM,     // 11
  COMPOUND_PERM,      // 12
};

class WERD_CHOICE {
 public:
  static const float kBadRating;

  WERD_CHOICE(const UNICHARSET *unicharset)
    : unicharset_(unicharset) { this->init(8); }
  WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
    : unicharset_(unicharset) { this->init(reserved); }
  WERD_CHOICE(const char *src_string,
              const char *src_lengths,
              float src_rating,
              float src_certainty,
              uinT8 src_permuter,
              const UNICHARSET &unicharset)
    : unicharset_(&unicharset) {
    this->init(src_string, src_lengths, src_rating,
               src_certainty, src_permuter);
  }
  WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
  WERD_CHOICE(const WERD_CHOICE &word) : unicharset_(word.unicharset_) {
    this->init(word.length());
    this->operator=(word);
  }
  ~WERD_CHOICE();

  const UNICHARSET *unicharset() const {
    return unicharset_;
  }
  inline int length() const {
    return length_;
  }
  inline const UNICHAR_ID *unichar_ids() const {
    return unichar_ids_;
  }
  inline const UNICHAR_ID unichar_id(int index) const {
    assert(index < length_);
    return unichar_ids_[index];
  }
  inline const char *fragment_lengths() const {
    return fragment_lengths_;
  }
  inline const char fragment_length(int index) const {
    assert(index < length_);
    return fragment_lengths_[index];
  }
  inline float rating() const {
    return rating_;
  }
  inline float certainty() const {
    return certainty_;
  }
  inline uinT8 permuter() const {
    return permuter_;
  }
  const char *permuter_name() const;
  inline bool fragment_mark() const {
    return fragment_mark_;
  }
  inline BLOB_CHOICE_LIST_CLIST* blob_choices() {
    return blob_choices_;
  }
  inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
    assert(index < length_);
    unichar_ids_[index] = unichar_id;
  }
  inline void set_fragment_length(char flen, int index) {
    assert(index < length_);
    fragment_lengths_[index] = flen;
  }
  inline void set_rating(float new_val) {
    rating_ = new_val;
  }
  inline void set_certainty(float new_val) {
    certainty_ = new_val;
  }
  inline void set_permuter(uinT8 perm) {
    permuter_ = perm;
  }
  inline void set_fragment_mark(bool new_fragment_mark) {
    fragment_mark_ = new_fragment_mark;
  }
  // Note: this function should only be used if all the fields
  // are populated manually with set_* functions (rather than
  // (copy)constructors and append_* functions).
  inline void set_length(int len) {
    ASSERT_HOST(reserved_ >= len);
    length_ = len;
  }
  void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices);

  /// Make more space in unichar_id_ and fragment_lengths_ arrays.
  inline void double_the_size() {
    if (reserved_ > 0) {
      unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
          reserved_, unichar_ids_);
      fragment_lengths_ = GenericVector<char>::double_the_size_memcpy(
          reserved_, fragment_lengths_);
      reserved_ *= 2;
    } else {
      unichar_ids_ = new UNICHAR_ID[1];
      fragment_lengths_ = new char[1];
      reserved_ = 1;
    }
  }

  /// Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and
  /// fragment_length_ arrays. Sets other values to default (blank) values.
  inline void init(int reserved) {
    reserved_ = reserved;
    if (reserved > 0) {
      unichar_ids_ = new UNICHAR_ID[reserved];
      fragment_lengths_ = new char[reserved];
    } else {
      unichar_ids_ = NULL;
      fragment_lengths_ = NULL;
    }
    length_ = 0;
    rating_ = 0.0;
    certainty_ = MAX_FLOAT32;
    permuter_ = NO_PERM;
    fragment_mark_ = false;
    blob_choices_ = NULL;
    unichars_in_script_order_ = false;  // Tesseract is strict left-to-right.
  }

  /// Helper function to build a WERD_CHOICE from the given string,
  /// fragment lengths, rating, certainty and permuter.
  /// The function assumes that src_string is not NULL.
  /// src_lengths argument could be NULL, in which case the unichars
  /// in src_string are assumed to all be of length 1.
  void init(const char *src_string, const char *src_lengths,
            float src_rating, float src_certainty,
            uinT8 src_permuter);

  /// Set the fields in this choice to be default (bad) values.
  inline void make_bad() {
    length_ = 0;
    rating_ = kBadRating;
    certainty_ = -MAX_FLOAT32;
    fragment_mark_ = false;
  }

  /// This function assumes that there is enough space reserved
  /// in the WERD_CHOICE for adding another unichar.
  /// This is an efficient alternative to append_unichar_id().
  inline void append_unichar_id_space_allocated(
      UNICHAR_ID unichar_id, char fragment_length,
      float rating, float certainty) {
    assert(reserved_ > length_);
    length_++;
    this->set_unichar_id(unichar_id, fragment_length,
                         rating, certainty, length_-1);
  }

  void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length,
                         float rating, float certainty);

  inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length,
                             float rating, float certainty, int index) {
    assert(index < length_);
    unichar_ids_[index] = unichar_id;
    fragment_lengths_[index] = fragment_length;
    rating_ += rating;
    if (certainty < certainty_) {
      certainty_ = certainty;
    }
  }

  bool contains_unichar_id(UNICHAR_ID unichar_id) const;
  void remove_unichar_ids(int index, int num);
  inline void remove_last_unichar_id() { --length_; }
  inline void remove_unichar_id(int index) {
    this->remove_unichar_ids(index, 1);
  }
  bool has_rtl_unichar_id() const;
  void reverse_and_mirror_unichar_ids();

  // Returns the half-open interval of unichar_id indices [start, end) which
  // enclose the core portion of this word -- the part after stripping
  // punctuation from the left and right.
  void punct_stripped(int *start_core, int *end_core) const;

  // Return a copy of this WERD_CHOICE with the choices [start, end).
  // The result is useful only for checking against a dictionary.
  WERD_CHOICE shallow_copy(int start, int end) const;

  void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
  const STRING debug_string() const {
    STRING word_str;
    for (int i = 0; i < length_; ++i) {
      word_str += unicharset_->debug_str(unichar_ids_[i]);
      word_str += " ";
    }
    return word_str;
  }

  // Call this to override the default (strict left to right graphemes)
  // with the fact that some engine produces a "reading order" set of
  // Graphemes for each word.
  bool set_unichars_in_script_order(bool in_script_order) {
    return unichars_in_script_order_ = in_script_order;
  }

  bool unichars_in_script_order() const {
    return unichars_in_script_order_;
  }

  // Returns a UTF-8 string equivalent to the current choice
  // of UNICHAR IDs.
  const STRING &unichar_string() const {
    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
    return unichar_string_;
  }

  // Returns the lengths, one byte each, representing the number of bytes
  // required in the unichar_string for each UNICHAR_ID.
  const STRING &unichar_lengths() const {
    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
    return unichar_lengths_;
  }
  const void print() const { this->print(""); }
  const void print(const char *msg) const;

  WERD_CHOICE& operator+= (     // concatanate
    const WERD_CHOICE & second);// second on first

  WERD_CHOICE& operator= (const WERD_CHOICE& source);

 private:
  const UNICHARSET *unicharset_;
  UNICHAR_ID *unichar_ids_;  // unichar ids that represent the text of the word
  char *fragment_lengths_;   // number of fragments in each unichar
  int reserved_;             // size of the above arrays
  int length_;               // word length
  float rating_;             // size related
  float certainty_;          // absolute
  uinT8 permuter_;           // permuter code
  bool fragment_mark_;       // if true, indicates that this choice
                             // was chosen over a better one that
                             // contained a fragment
  BLOB_CHOICE_LIST_CLIST *blob_choices_;  // best choices for each blob

  // Normally, the blob_choices_ represent the recognition results in order
  // from left-to-right.  However, some engines (say Cube) may return
  // recognition results in the order of the script's major reading direction
  // (for Arabic, that is right-to-left).
  bool unichars_in_script_order_;

  // The following variables are populated and passed by reference any
  // time unichar_string() or unichar_lengths() are called.
  mutable STRING unichar_string_;
  mutable STRING unichar_lengths_;

  bool unichar_info_present;

 private:
  void delete_blob_choices();
};

// Make WERD_CHOICE listable.
ELISTIZEH (WERD_CHOICE)
typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
typedef GenericVector<WERD_CHOICE_LIST *> WERD_CHOICE_LIST_VECTOR;

// Utilities for comparing WERD_CHOICEs

bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
                                       const WERD_CHOICE &word2);

// Utilities for debug printing.
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings);
void print_ratings_list(
    const char *msg,                      // intro message
    BLOB_CHOICE_LIST *ratings,            // list of results
    const UNICHARSET &current_unicharset  // unicharset that can be used
                                          // for id-to-unichar conversion
    );
void print_ratings_info(
    FILE *fp,                             // file to use
    BLOB_CHOICE_LIST *ratings,            // list of results
    const UNICHARSET &current_unicharset  // unicharset that can be used
                                          // for id-to-unichar conversion
    );
void print_char_choices_list(
    const char *msg,
    const BLOB_CHOICE_LIST_VECTOR &char_choices,
    const UNICHARSET &current_unicharset,
    BOOL8 detailed
    );
void print_word_alternates_list(
    WERD_CHOICE *word,
    GenericVector<WERD_CHOICE *> *alternates);

#endif