This file is indexed.

/usr/include/tesseract/cluster.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/******************************************************************************
 **	Filename:	cluster.h
 **	Purpose:	Definition of feature space clustering routines
 **	Author:		Dan Johnson
 **	History:	5/29/89, DSJ, Created.
 **
 **	(c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 ******************************************************************************/
#ifndef   CLUSTER_H
#define   CLUSTER_H

#include "kdtree.h"
#include "oldlist.h"

struct BUCKETS;

#define MINBUCKETS      5
#define MAXBUCKETS      39

/*----------------------------------------------------------------------
          Types
----------------------------------------------------------------------*/
typedef struct sample {
  unsigned Clustered:1;          // TRUE if included in a higher cluster
  unsigned Prototype:1;          // TRUE if cluster represented by a proto
  unsigned SampleCount:30;       // number of samples in this cluster
  struct sample *Left;           // ptr to left sub-cluster
  struct sample *Right;          // ptr to right sub-cluster
  inT32 CharID;                  // identifier of char sample came from
  FLOAT32 Mean[1];               // mean of cluster - SampleSize floats
} CLUSTER;

typedef CLUSTER SAMPLE;          // can refer to as either sample or cluster

typedef enum {
  spherical, elliptical, mixed, automatic
} PROTOSTYLE;

typedef struct {                 // parameters to control clustering
  PROTOSTYLE ProtoStyle;         // specifies types of protos to be made
  FLOAT32 MinSamples;            // min # of samples per proto - % of total
  FLOAT32 MaxIllegal;            // max percentage of samples in a cluster which have
  // more than 1 feature in that cluster
  FLOAT32 Independence;          // desired independence between dimensions
  FLOAT64 Confidence;            // desired confidence in prototypes created
  int MagicSamples;              // Ideal number of samples in a cluster.
} CLUSTERCONFIG;

typedef enum {
  normal, uniform, D_random, DISTRIBUTION_COUNT
} DISTRIBUTION;

typedef union {
  FLOAT32 Spherical;
  FLOAT32 *Elliptical;
} FLOATUNION;

typedef struct {
  unsigned Significant:1;        // TRUE if prototype is significant
  unsigned Merged:1;             // Merged after clustering so do not output
                                 // but kept for display purposes. If it has no
                                 // samples then it was actually merged.
                                 // Otherwise it matched an already significant
                                 // cluster.
  unsigned Style:2;              // spherical, elliptical, or mixed
  unsigned NumSamples:28;        // number of samples in the cluster
  CLUSTER *Cluster;              // ptr to cluster which made prototype
  DISTRIBUTION *Distrib;         // different distribution for each dimension
  FLOAT32 *Mean;                 // prototype mean
  FLOAT32 TotalMagnitude;        // total magnitude over all dimensions
  FLOAT32 LogMagnitude;          // log base e of TotalMagnitude
  FLOATUNION Variance;           // prototype variance
  FLOATUNION Magnitude;          // magnitude of density function
  FLOATUNION Weight;             // weight of density function
} PROTOTYPE;

typedef struct {
  inT16 SampleSize;              // number of parameters per sample
  PARAM_DESC *ParamDesc;         // description of each parameter
  inT32 NumberOfSamples;         // total number of samples being clustered
  KDTREE *KDTree;                // for optimal nearest neighbor searching
  CLUSTER *Root;                 // ptr to root cluster of cluster tree
  LIST ProtoList;                // list of prototypes
  inT32 NumChar;                 // # of characters represented by samples
  // cache of reusable histograms by distribution type and number of buckets.
  BUCKETS* bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS + 1 - MINBUCKETS];
} CLUSTERER;

typedef struct {
  inT32 NumSamples;              // number of samples in list
  inT32 MaxNumSamples;           // maximum size of list
  SAMPLE *Sample[1];             // array of ptrs to sample data structures
} SAMPLELIST;

// low level cluster tree analysis routines.
#define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C))))

/*--------------------------------------------------------------------------
        Public Function Prototypes
--------------------------------------------------------------------------*/
CLUSTERER *MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]);

SAMPLE *MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID);

LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);

void FreeClusterer(CLUSTERER *Clusterer);

void FreeProtoList(LIST *ProtoList);

void FreePrototype(void *arg);  // PROTOTYPE *Prototype);

CLUSTER *NextSample(LIST *SearchState);

FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension);

FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension);

inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2,
                    FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]);

//--------------Global Data Definitions and Declarations---------------------------
// define errors that can be trapped
#define ALREADYCLUSTERED  4000
#endif