This file is indexed.

/usr/include/tesseract/networkio.h is in libtesseract-dev 4.00~git2288-10f4998a-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
///////////////////////////////////////////////////////////////////////
// File:        networkio.h
// Description: Network input/output data, allowing float/int implementations.
// Author:      Ray Smith
// Created:     Tue Jun 17 08:43:11 PST 2014
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_LSTM_NETWORKIO_H_
#define TESSERACT_LSTM_NETWORKIO_H_

#include <math.h>
#include <stdio.h>
#include <vector>

#include "genericvector.h"
#include "helpers.h"
#include "static_shape.h"
#include "stridemap.h"
#include "weightmatrix.h"

struct Pix;

namespace tesseract {

// Class to contain all the input/output of a network, allowing for fixed or
// variable-strided 2d to 1d mapping, and float or int8_t values. Provides
// enough calculating functions to hide the detail of the implementation.
class NetworkIO {
 public:
  NetworkIO() : int_mode_(false) {}
  // Resizes the array (and stride), avoiding realloc if possible, to the given
  // size from various size specs:
  // Same stride size, but given number of features.
  void Resize(const NetworkIO& src, int num_features) {
    ResizeToMap(src.int_mode(), src.stride_map(), num_features);
  }
  // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
  void Resize2d(bool int_mode, int width, int num_features);
  // Resizes forcing a float representation with the stridemap of src and the
  // given number of features.
  void ResizeFloat(const NetworkIO& src, int num_features) {
    ResizeToMap(false, src.stride_map(), num_features);
  }
  // Resizes to a specific stride_map.
  void ResizeToMap(bool int_mode, const StrideMap& stride_map,
                   int num_features);
  // Shrinks image size by x_scale,y_scale, and use given number of features.
  void ResizeScaled(const NetworkIO& src, int x_scale, int y_scale,
                    int num_features);
  // Resizes to just 1 x-coord, whatever the input.
  void ResizeXTo1(const NetworkIO& src, int num_features);
  // Initialize all the array to zero.
  void Zero();
  // Initializes to zero all elements of the array that do not correspond to
  // valid image positions. (If a batch of different-sized images are packed
  // together, then there will be padding pixels.)
  void ZeroInvalidElements();
  // Sets up the array from the given image, using the currently set int_mode_.
  // If the image width doesn't match the shape, the image is truncated or
  // padded with noise to match.
  void FromPix(const StaticShape& shape, const Pix* pix, TRand* randomizer);
  // Sets up the array from the given set of images, using the currently set
  // int_mode_. If the image width doesn't match the shape, the images are
  // truncated or padded with noise to match.
  void FromPixes(const StaticShape& shape, const std::vector<const Pix*>& pixes,
                 TRand* randomizer);
  // Copies the given pix to *this at the given batch index, stretching and
  // clipping the pixel values so that [black, black + 2*contrast] maps to the
  // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
  // This is a 2-d operation in the sense that the output depth is the number
  // of input channels, the height is the height of the image, and the width
  // is the width of the image, or truncated/padded with noise if the width
  // is a fixed size.
  void Copy2DImage(int batch, Pix* pix, float black, float contrast,
                   TRand* randomizer);
  // Copies the given pix to *this at the given batch index, as Copy2DImage
  // above, except that the output depth is the height of the input image, the
  // output height is 1, and the output width as for Copy2DImage.
  // The image is thus treated as a 1-d set of vertical pixel strips.
  void Copy1DGreyImage(int batch, Pix* pix, float black, float contrast,
                       TRand* randomizer);
  // Helper stores the pixel value in i_ or f_ according to int_mode_.
  // t: is the index from the StrideMap corresponding to the current
  //   [batch,y,x] position
  // f: is the index into the depth/channel
  // pixel: the value of the pixel from the image (in one channel)
  // black: the pixel value to map to the lowest of the range of *this
  // contrast: the range of pixel values to stretch to half the range of *this.
  void SetPixel(int t, int f, int pixel, float black, float contrast);
  // Converts the array to a Pix. Must be pixDestroyed after use.
  Pix* ToPix() const;
  // Prints the first and last num timesteps of the array for each feature.
  void Print(int num) const;

  // Returns the timestep width.
  int Width() const {
    return int_mode_ ? i_.dim1() : f_.dim1();
  }
  // Returns the number of features.
  int NumFeatures() const {
    return int_mode_ ? i_.dim2() : f_.dim2();
  }
  // Accessor to a timestep of the float matrix.
  float* f(int t) {
    ASSERT_HOST(!int_mode_);
    return f_[t];
  }
  const float* f(int t) const {
    ASSERT_HOST(!int_mode_);
    return f_[t];
  }
  const int8_t* i(int t) const {
    ASSERT_HOST(int_mode_);
    return i_[t];
  }
  bool int_mode() const {
    return int_mode_;
  }
  void set_int_mode(bool is_quantized) {
    int_mode_ = is_quantized;
  }
  const StrideMap& stride_map() const {
    return stride_map_;
  }
  void set_stride_map(const StrideMap& map) {
    stride_map_ = map;
  }
  const GENERIC_2D_ARRAY<float>& float_array() const { return f_; }
  GENERIC_2D_ARRAY<float>* mutable_float_array() { return &f_; }

  // Copies a single time step from src.
  void CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t);
  // Copies a part of single time step from src.
  void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features,
                           const NetworkIO& src, int src_t, int src_offset);
  // Zeroes a single time step.
  void ZeroTimeStep(int t) { ZeroTimeStepGeneral(t, 0, NumFeatures()); }
  void ZeroTimeStepGeneral(int t, int offset, int num_features);
  // Sets the given range to random values.
  void Randomize(int t, int offset, int num_features, TRand* randomizer);

  // Helper returns the label and score of the best choice over a range.
  int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch,
                          float* rating, float* certainty) const;
  // Helper returns the rating and certainty of the choice over a range in t.
  void ScoresOverRange(int t_start, int t_end, int choice, int null_ch,
                       float* rating, float* certainty) const;
  // Returns the index (label) of the best value at the given timestep,
  // and if not null, sets the score to the log of the corresponding value.
  int BestLabel(int t, float* score) const {
    return BestLabel(t, -1, -1, score);
  }
  // Returns the index (label) of the best value at the given timestep,
  // excluding not_this and not_that, and if not null, sets the score to the
  // log of the corresponding value.
  int BestLabel(int t, int not_this, int not_that, float* score) const;
  // Returns the best start position out of range (into which both start and end
  // must fit) to obtain the highest cumulative score for the given labels.
  int PositionOfBestMatch(const GenericVector<int>& labels, int start,
                          int end) const;
  // Returns the cumulative score of the given labels starting at start, and
  // using one label per time-step.
  double ScoreOfLabels(const GenericVector<int>& labels, int start) const;
  // Helper function sets all the outputs for a single timestep, such that
  // label has value ok_score, and the other labels share 1 - ok_score.
  // Assumes float mode.
  void SetActivations(int t, int label, float ok_score);
  // Modifies the values, only if needed, so that the given label is
  // the winner at the given time step t.
  // Assumes float mode.
  void EnsureBestLabel(int t, int label);
  // Helper function converts prob to certainty taking the minimum into account.
  static float ProbToCertainty(float prob);
  // Returns true if there is any bad value that is suspiciously like a GT
  // error. Assuming that *this is the difference(gradient) between target
  // and forward output, returns true if there is a large negative value
  // (correcting a very confident output) for which there is no corresponding
  // positive value in an adjacent timestep for the same feature index. This
  // allows the box-truthed samples to make fine adjustments to position while
  // stopping other disagreements of confident output with ground truth.
  bool AnySuspiciousTruth(float confidence_thr) const;

  // Reads a single timestep to floats in the range [-1, 1].
  void ReadTimeStep(int t, double* output) const;
  // Adds a single timestep to floats.
  void AddTimeStep(int t, double* inout) const;
  // Adds part of a single timestep to floats.
  void AddTimeStepPart(int t, int offset, int num_features, float* inout) const;
  // Writes a single timestep from floats in the range [-1, 1].
  void WriteTimeStep(int t, const double* input);
  // Writes a single timestep from floats in the range [-1, 1] writing only
  // num_features elements of input to (*this)[t], starting at offset.
  void WriteTimeStepPart(int t, int offset, int num_features,
                         const double* input);
  // Maxpools a single time step from src.
  void MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t,
                       int* max_line);
  // Runs maxpool backward, using maxes to index timesteps in *this.
  void MaxpoolBackward(const NetworkIO& fwd,
                       const GENERIC_2D_ARRAY<int>& maxes);
  // Returns the min over time of the maxes over features of the outputs.
  float MinOfMaxes() const;
  // Returns the min over time.
  float Max() const { return int_mode_ ? i_.Max() : f_.Max(); }
  // Computes combined results for a combiner that chooses between an existing
  // input and itself, with an additional output to indicate the choice.
  void CombineOutputs(const NetworkIO& base_output,
                      const NetworkIO& combiner_output);
  // Computes deltas for a combiner that chooses between 2 sets of inputs.
  void ComputeCombinerDeltas(const NetworkIO& fwd_deltas,
                             const NetworkIO& base_output);

  // Copies the array checking that the types match.
  void CopyAll(const NetworkIO& src);
  // Adds the array to a float array, with scaling to [-1, 1] if the src is int.
  void AddAllToFloat(const NetworkIO& src);
  // Subtracts the array from a float array. src must also be float.
  void SubtractAllFromFloat(const NetworkIO& src);

  // Copies src to *this, with maxabs normalization to match scale.
  void CopyWithNormalization(const NetworkIO& src, const NetworkIO& scale);
  // Multiplies the float data by the given factor.
  void ScaleFloatBy(float factor) { f_ *= factor; }
  // Copies src to *this with independent reversal of the y dimension.
  void CopyWithYReversal(const NetworkIO& src);
  // Copies src to *this with independent reversal of the x dimension.
  void CopyWithXReversal(const NetworkIO& src);
  // Copies src to *this with independent transpose of the x and y dimensions.
  void CopyWithXYTranspose(const NetworkIO& src);
  // Copies src to *this, at the given feature_offset, returning the total
  // feature offset after the copy. Multiple calls will stack outputs from
  // multiple sources in feature space.
  int CopyPacking(const NetworkIO& src, int feature_offset);
  // Opposite of CopyPacking, fills *this with a part of src, starting at
  // feature_offset, and picking num_features. Resizes *this to match.
  void CopyUnpacking(const NetworkIO& src, int feature_offset,
                     int num_features);
  // Transposes the float part of *this into dest.
  void Transpose(TransposedArray* dest) const;

  // Clips the content of a single time-step to +/-range.
  void ClipVector(int t, float range);

  // Applies Func to timestep t of *this (u) and multiplies the result by v
  // component-wise, putting the product in *product.
  // *this and v may be int or float, but must match. The outputs are double.
  template <class Func>
  void FuncMultiply(const NetworkIO& v_io, int t, double* product) {
    Func f;
    ASSERT_HOST(!int_mode_);
    ASSERT_HOST(!v_io.int_mode_);
    int dim = f_.dim2();
    if (int_mode_) {
      const int8_t* u = i_[t];
      const int8_t* v = v_io.i_[t];
      for (int i = 0; i < dim; ++i) {
        product[i] = f(u[i] / static_cast<double>(INT8_MAX)) * v[i] /
                     static_cast<double>(INT8_MAX);
      }
    } else {
      const float* u = f_[t];
      const float* v = v_io.f_[t];
      for (int i = 0; i < dim; ++i) {
        product[i] = f(u[i]) * v[i];
      }
    }
  }
  // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
  // component-wise, putting the product in *product.
  // All NetworkIOs are assumed to be float.
  template <class Func>
  void FuncMultiply3(int u_t, const NetworkIO& v_io, int v_t, const double* w,
                     double* product) const {
    ASSERT_HOST(!int_mode_);
    ASSERT_HOST(!v_io.int_mode_);
    Func f;
    const float* u = f_[u_t];
    const float* v = v_io.f_[v_t];
    int dim = f_.dim2();
    for (int i = 0; i < dim; ++i) {
      product[i] = f(u[i]) * v[i] * w[i];
    }
  }
  // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
  // component-wise, adding the product to *product.
  // All NetworkIOs are assumed to be float.
  template <class Func>
  void FuncMultiply3Add(const NetworkIO& v_io, int t, const double* w,
                        double* product) const {
    ASSERT_HOST(!int_mode_);
    ASSERT_HOST(!v_io.int_mode_);
    Func f;
    const float* u = f_[t];
    const float* v = v_io.f_[t];
    int dim = f_.dim2();
    for (int i = 0; i < dim; ++i) {
      product[i] += f(u[i]) * v[i] * w[i];
    }
  }
  // Applies Func1 to *this (u), Func2 to v, and multiplies the result by w,
  // component-wise, putting the product in product, all at timestep t, except
  // w, which is a simple array. All NetworkIOs are assumed to be float.
  template <class Func1, class Func2>
  void Func2Multiply3(const NetworkIO& v_io, int t, const double* w,
                      double* product) const {
    ASSERT_HOST(!int_mode_);
    ASSERT_HOST(!v_io.int_mode_);
    Func1 f;
    Func2 g;
    const float* u = f_[t];
    const float* v = v_io.f_[t];
    int dim = f_.dim2();
    for (int i = 0; i < dim; ++i) {
      product[i] = f(u[i]) * g(v[i]) * w[i];
    }
  }

 private:
  // Returns the padding required for the given number of features in order
  // for the SIMD operations to be safe.
  static int GetPadding(int num_features);

  // Choice of float vs 8 bit int for data.
  GENERIC_2D_ARRAY<float> f_;
  GENERIC_2D_ARRAY<int8_t> i_;
  // Which of f_ and i_ are we actually using.
  bool int_mode_;
  // Stride for 2d input data.
  StrideMap stride_map_;
  // Holds the optimal integer multiplier for this machine.
  // This is a leaked, lazily initialized singleton, and is used for computing
  // padding to apply to i_ for SIMD use.
  static IntSimdMatrix* multiplier_;
};

}  // namespace tesseract.

#endif  // TESSERACT_LSTM_NETWORKIO_H_