This file is indexed.

/usr/include/tesseract/commontraining.h is in libtesseract-dev 3.02.01-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Copyright 2008 Google Inc. All Rights Reserved.
// Author: scharron@google.com (Samuel Charron)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef TESSERACT_TRAINING_COMMONTRAINING_H__
#define TESSERACT_TRAINING_COMMONTRAINING_H__

#include "oldlist.h"
#include "cluster.h"
#include "intproto.h"
#include "featdefs.h"

// Macros to merge tesseract params with command-line flags.
#ifdef USE_STD_NAMESPACE
#include "params.h"
#  define INT_PARAM_FLAG(name, val, comment) \
    INT_VAR(FLAGS_##name, val, comment)
#  define DECLARE_INT_PARAM_FLAG(name) extern INT_VAR_H(FLAGS_##name, 0, "")
#  define STRING_PARAM_FLAG(name, val, comment) \
    STRING_VAR(FLAGS_##name, val, comment)
#  define DECLARE_STRING_PARAM_FLAG(name) \
    extern STRING_VAR_H(FLAGS_##name, "", "")
#  define c_str string
#else
#include "base/commandlineflags.h"
#  define INT_PARAM_FLAG(name, val, comment) \
    DEFINE_int32(name, val, comment)
#  define DECLARE_INT_PARAM_FLAG(name) DECLARE_int32(name)
#  define STRING_PARAM_FLAG(name, val, comment) \
    DEFINE_string(name, val, comment)
#  define DECLARE_STRING_PARAM_FLAG(name) DECLARE_string(name)
#endif

namespace tesseract {
class Classify;
class MasterTrainer;
class ShapeTable;
}

//////////////////////////////////////////////////////////////////////////////
// Globals ///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////

extern FEATURE_DEFS_STRUCT feature_defs;

// Must be defined in the file that "implements" commonTraining facilities.
extern CLUSTERCONFIG Config;

//////////////////////////////////////////////////////////////////////////////
// Structs ///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
typedef struct
{
  char  *Label;
  int   SampleCount;
  int   font_sample_count;
  LIST  List;
}
LABELEDLISTNODE, *LABELEDLIST;

typedef struct
{
  char* Label;
  int   NumMerged[MAX_NUM_PROTOS];
  CLASS_TYPE Class;
}MERGE_CLASS_NODE;
typedef MERGE_CLASS_NODE* MERGE_CLASS;


//////////////////////////////////////////////////////////////////////////////
// Functions /////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
void ParseArguments(int* argc, char*** argv);

namespace tesseract {
// Helper loads shape table from the given file.
ShapeTable* LoadShapeTable(const STRING& file_prefix);
// Helper to write the shape_table.
void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);

// Creates a MasterTraininer and loads the training data into it:
// Initializes feature_defs and IntegerFX.
// Loads the shape_table if shape_table != NULL.
// Loads initial unicharset from -U command-line option.
// If FLAGS_input_trainer is set, loads the majority of data from there, else:
//   Loads font info from -F option.
//   Loads xheights from -X option.
//   Loads samples from .tr files in remaining command-line args.
//   Deletes outliers and computes canonical samples.
//   If FLAGS_output_trainer is set, saves the trainer for future use.
// Computes canonical and cloud features.
// If shape_table is not NULL, but failed to load, make a fake flat one,
// as shape clustering was not run.
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
                                bool replication,
                                ShapeTable** shape_table,
                                STRING* file_prefix);
}  // namespace tesseract.

const char *GetNextFilename(int argc, const char* const * argv);

LABELEDLIST FindList(
    LIST        List,
    char        *Label);

LABELEDLIST NewLabeledList(
    const char  *Label);

void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
                         const char *feature_name, int max_samples,
                         UNICHARSET* unicharset,
                         FILE* file, LIST* training_samples);

void WriteTrainingSamples(
    const FEATURE_DEFS_STRUCT &FeatureDefs,
    char *Directory,
    LIST CharList,
    const char  *program_feature_type);

void FreeTrainingSamples(
    LIST        CharList);

void FreeLabeledList(
    LABELEDLIST LabeledList);

void FreeLabeledClassList(
    LIST        ClassListList);

CLUSTERER *SetUpForClustering(
    const FEATURE_DEFS_STRUCT &FeatureDefs,
    LABELEDLIST CharSample,
    const char  *program_feature_type);

LIST RemoveInsignificantProtos(
    LIST        ProtoList,
    BOOL8       KeepSigProtos,
    BOOL8       KeepInsigProtos,
    int         N);

void CleanUpUnusedData(
    LIST        ProtoList);

void MergeInsignificantProtos(
    LIST        ProtoList,
    const char  *label,
    CLUSTERER   *Clusterer,
    CLUSTERCONFIG *Config);

MERGE_CLASS FindClass(
    LIST        List,
    const char        *Label);

MERGE_CLASS NewLabeledClass(
    const char        *Label);

void FreeTrainingSamples(
    LIST        CharList);

CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
                                LIST LabeledClassList);

void Normalize(
    float       *Values);

void FreeNormProtoList(
    LIST        CharList);

void AddToNormProtosList(
    LIST*       NormProtoList,
    LIST        ProtoList,
    char        *CharName);

int NumberOfProtos(
    LIST        ProtoList,
    BOOL8       CountSigProtos,
    BOOL8       CountInsigProtos);


void allocNormProtos();
#endif  // TESSERACT_TRAINING_COMMONTRAINING_H__