This file is indexed.

/usr/include/timbl/Instance.h is in libtimbl4-dev 6.4.4-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
/*
  $Id: Instance.h 15565 2013-01-07 14:27:05Z sloot $
  $URL: https://ilk.uvt.nl/svn/trunk/sources/Timbl6/include/timbl/Instance.h $

  Copyright (c) 1998 - 2013
  ILK   - Tilburg University
  CLiPS - University of Antwerp
 
  This file is part of timbl

  timbl is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 3 of the License, or
  (at your option) any later version.

  timbl is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, see <http://www.gnu.org/licenses/>.

  For questions and suggestions, see:
      http://ilk.uvt.nl/software.html
  or send mail to:
      timbl@uvt.nl
*/
#ifndef TIMBL_INSTANCE_H
#define TIMBL_INSTANCE_H

#include <stdexcept>
#include <list>
#include <vector>
#include <map>
#include "timbl/MsgClass.h"
#include "timbl/Matrices.h"
#include "ticcutils/TreeHash.h"

namespace Timbl {
  
  enum FeatVal_Stat { Unknown, Singleton, SingletonNumeric, NumericValue,
		      NotNumeric };

  class TargetValue;

  class Vfield{
    friend class ValueDistribution;
    friend class WValueDistribution;
    friend std::ostream& operator<<( std::ostream&, const Vfield& );
    friend std::ostream& operator<<( std::ostream&, const Vfield * );
  public:
    Vfield( const TargetValue *val, int freq, double w ):
      value(val), frequency(freq), weight(w) {};
    Vfield( const Vfield& in ):
      value(in.value), frequency(in.frequency), weight(in.weight) {};
    ~Vfield(){};
    std::ostream& put( std::ostream& ) const;
    const TargetValue *Value() const { return value; };
    void Value( const TargetValue *t ){  value = t; };
    size_t Freq() const { return frequency; };
    void IncFreq( int inc=1 ) {  frequency += inc; };
    void AddFreq( int f ) {  frequency += f; weight += f; };
    void DecFreq() {  frequency -= 1; };
    double Weight() const { return weight; };
    void SetWeight( double w ){ weight = w; };
    size_t Index();
  protected:
    const TargetValue *value;
    size_t frequency;
    double weight;
  private:
    Vfield& operator=( const Vfield& );
  };
  
  class Target;

  class WValueDistribution;

  class ValueDistribution{
    friend std::ostream& operator<<( std::ostream&, const ValueDistribution& );
    friend std::ostream& operator<<( std::ostream&, const ValueDistribution * );
    friend class WValueDistribution;
  public:
    typedef std::map<size_t, Vfield *> VDlist;
    typedef VDlist::const_iterator dist_iterator;
    ValueDistribution( ): total_items(0) {};
    ValueDistribution( const ValueDistribution& );
    virtual ~ValueDistribution(){ clear(); };
    size_t totalSize() const{ return total_items; };
    size_t size() const{ return distribution.size(); };
    bool empty() const{ return distribution.empty(); };
    void clear();
    dist_iterator begin() const { return distribution.begin(); };
    dist_iterator end() const { return distribution.end(); };
    virtual const TargetValue* BestTarget( bool &, bool = false ) const;
    void Merge( const ValueDistribution& );
    virtual void SetFreq( const TargetValue *, int, double=1.0 );
    virtual bool IncFreq( const TargetValue *, size_t, double=1.0 );
    void DecFreq( const TargetValue * );
    static ValueDistribution *read_distribution( std::istream &, 
						 Target *, bool );
    static ValueDistribution *read_distribution_hashed( std::istream &, 
							Target *, bool );
    const std::string DistToString() const;
    const std::string DistToStringW( int ) const;
    double Confidence( const TargetValue * ) const;
    virtual const std::string SaveHashed() const;
    virtual const std::string Save() const;
    bool ZeroDist() const { return total_items == 0; };
    double Entropy() const;
    ValueDistribution *to_VD_Copy( ) const;
    virtual WValueDistribution *to_WVD_Copy() const;
  protected:
    virtual void DistToString( std::string&, double=0 ) const;
    virtual void DistToStringWW( std::string&, int ) const;
    const TargetValue* BestTargetN( bool &, bool = false ) const;
    const TargetValue* BestTargetW( bool &, bool = false ) const;
    virtual ValueDistribution *clone( ) const { 
      return new ValueDistribution(); };
    size_t total_items;
    VDlist distribution;
  };

  class WValueDistribution: public ValueDistribution {
  public:
    WValueDistribution(): ValueDistribution() {};
    const TargetValue* BestTarget( bool &, bool = false ) const;
    void SetFreq( const TargetValue *, int, double );
    bool IncFreq( const TargetValue *, size_t, double );
    WValueDistribution *to_WVD_Copy( ) const;
    const std::string SaveHashed() const;
    const std::string Save() const;
    void Normalize();
    void Normalize_1( double, const Target * );
    void Normalize_2();
    void MergeW( const ValueDistribution&, double );
  private:
    void DistToString( std::string&, double=0 ) const;
    void DistToStringWW( std::string&, int ) const;
    WValueDistribution *clone() const { 
      return new WValueDistribution; };
  };

  class ValueClass {
  public:
    ValueClass( const std::string& n, size_t i ):
      name( n ), index( i ), Frequency( 1 ) {};
    virtual ~ValueClass() {};
    void ValFreq( size_t f ){ Frequency = f; };
    void IncValFreq( int f ){ Frequency += f; };
    size_t ValFreq( ) const { return Frequency; };
    void incr_val_freq(){ Frequency++; };
    void decr_val_freq(){ Frequency--; };
    size_t Index() const { return index; };
    const std::string& Name() const { return name; };
    friend std::ostream& operator<<( std::ostream& os, ValueClass const *vc );
  protected:
    const std::string& name;
    size_t index;
    size_t Frequency;
    ValueClass( const ValueClass& );
    ValueClass& operator=( const ValueClass& );
  };
  
  class TargetValue: public ValueClass {
  public:
    TargetValue( const std::string&, size_t );
  };
  
  class SparseValueProbClass {
    friend std::ostream& operator<< ( std::ostream&, SparseValueProbClass * );
  public:
    typedef std::map< size_t, double > IDmaptype;
    typedef IDmaptype::const_iterator IDiterator;
    SparseValueProbClass( size_t d ): dimension(d) {};
    void Assign( const size_t i, const double d ) { vc_map[i] = d; };
    void Clear() { vc_map.clear(); };
    IDiterator begin() const { return vc_map.begin(); };
    IDiterator end() const { return vc_map.end(); };    
  private:
    IDmaptype vc_map;
    size_t dimension;
  };
  
  class FeatureValue: public ValueClass {
    friend class Feature;
    friend struct D_D;
  public:
    FeatureValue( const std::string& );
    FeatureValue( const std::string&, size_t );
    ~FeatureValue();
    void ReconstructDistribution( const ValueDistribution& vd ) { 
      TargetDist.Merge( vd );
      Frequency = TargetDist.totalSize();
    };
    bool isUnknown() const { return index == 0; };
    SparseValueProbClass *valueClassProb() const { return ValueClassProb; };
  private:
    SparseValueProbClass *ValueClassProb;
    ValueDistribution TargetDist;
    FeatureValue( const FeatureValue& );
    FeatureValue& operator=( const FeatureValue& );
  };
    
  typedef std::map< size_t, ValueClass *> IVCmaptype;
  typedef std::vector<ValueClass *> VCarrtype;

  class BaseFeatTargClass: public MsgClass {
  public:
    BaseFeatTargClass( Hash::StringHash * );
    virtual ~BaseFeatTargClass();
    size_t EffectiveValues() const;
    size_t TotalValues() const;
    VCarrtype ValuesArray;
    IVCmaptype ValuesMap;
    virtual ValueClass *Lookup( const std::string& ) const = 0;
  protected:
    Hash::StringHash *TokenTree;
    bool is_copy;
    BaseFeatTargClass( const BaseFeatTargClass& );
  private:
    BaseFeatTargClass& operator=( const BaseFeatTargClass& );
  };
  
  
  class Target: public BaseFeatTargClass {
  public:
  Target( Hash::StringHash *T ): BaseFeatTargClass(T) {};
    TargetValue *add_value( const std::string&, int freq = 1 );
    TargetValue *add_value( size_t, int freq = 1 );
    TargetValue *Lookup( const std::string& ) const;
    TargetValue *ReverseLookup( size_t ) const;
    bool decrement_value( TargetValue * );
    bool increment_value( TargetValue * );
    TargetValue *MajorityClass() const;
  };

  class metricClass;

  class Feature: public BaseFeatTargClass {
    friend class MBLClass;
  public:
    Feature( Hash::StringHash *T );
    ~Feature();
    bool Ignore() const { return ignore; };
    void Ignore( const bool val ){ ignore = val; };
    bool setMetricType( const MetricType );
    MetricType getMetricType() const;
    double Weight() const { return weight; };
    void SetWeight( const double w ) { weight = w; };
    double InfoGain() const { return info_gain; };
    void InfoGain( const double w ){ info_gain = w; };
    double SplitInfo() const { return split_info; };
    void SplitInfo( const double w ){ split_info = w; };
    double GainRatio() const { return gain_ratio; };
    void GainRatio( const double w ){ gain_ratio = w; };
    double ChiSquare() const { return chi_square; };
    void ChiSquare( const double w ){ chi_square = w; };
    double SharedVariance() const { return shared_variance; };
    void SharedVariance( const double w ){ shared_variance = w; };
    double StandardDeviation() const { return standard_deviation; };
    void StandardDeviation( const double w ){ standard_deviation = w; };
    double Min() const { return n_min; };
    void Min( const double val ){ n_min = val; };
    double Max() const { return n_max; };
    void Max( const double val ){ n_max = val; };
    double fvDistance( FeatureValue *, FeatureValue *, size_t=1 ) const;
    FeatureValue *add_value( const std::string&, TargetValue *, int=1 );
    FeatureValue *add_value( size_t, TargetValue *, int=1 );
    FeatureValue *Lookup( const std::string& ) const ;
    bool decrement_value( FeatureValue *, TargetValue * );
    bool increment_value( FeatureValue *, TargetValue * );
    bool isNumerical() const;
    bool isStorableMetric() const;
    bool AllocSparseArrays( size_t );
    void InitSparseArrays();
    bool ArrayRead(){ return vcpb_read; };
    bool matrixPresent( bool& ) const;
    size_t matrix_byte_size() const;
    bool store_matrix( int = 1 );
    void clear_matrix();
    bool fill_matrix( std::istream& );
    void print_matrix( std::ostream&, bool = false ) const;
    void print_vc_pb_array( std::ostream& ) const;
    bool read_vc_pb_array( std::istream &  );
    FeatVal_Stat prepare_numeric_stats();
    void Statistics( double, Target *, bool );
    void NumStatistics( double, Target *, int, bool );
    void ClipFreq( size_t f ){ matrix_clip_freq = f; };  
    size_t ClipFreq() const { return matrix_clip_freq; };
    SparseSymetricMatrix<FeatureValue *> *metric_matrix;
 private:
    metricClass *metric;
    bool ignore;
    bool numeric;
    bool vcpb_read;
    enum ps_stat{ ps_undef, ps_failed, ps_ok, ps_read };
    enum ps_stat PrestoreStatus;
    MetricType Prestored_metric;
    void delete_matrix();
    double entropy;
    double info_gain;
    double split_info;
    double gain_ratio;
    double chi_square;
    double shared_variance;
    double standard_deviation;
    size_t matrix_clip_freq;
    long int *n_dot_j;
    long int* n_i_dot;
    double n_min;
    double n_max;
    size_t SaveSize;
    size_t SaveNum;
    double weight;
    void Statistics( double );
    void NumStatistics( std::vector<FeatureValue *>&, double, int );
    void ChiSquareStatistics( std::vector<FeatureValue *>&, size_t, Target * );
    void ChiSquareStatistics( Target * );
    void SharedVarianceStatistics( Target *, int );
    void StandardDeviationStatistics();
    Feature( const Feature& );
    Feature& operator=( const Feature& );
  };
  
  class Instance {
    friend std::ostream& operator<<(std::ostream&, const Instance& );
    friend std::ostream& operator<<(std::ostream&, const Instance * );
  public:
    Instance();
    Instance( size_t s ){ Init( s ); };
    ~Instance();
    void Init( size_t );
    void clear();
    double ExemplarWeight() const { return sample_weight; }; 
    void ExemplarWeight( const double sw ){ sample_weight = sw; }; 
    int Occurrences() const { return occ; };
    void Occurrences( const int o ) { occ = o; };
    size_t size() const { return FV.size(); };
    std::vector<FeatureValue *> FV;
    void permute( const std::vector<size_t> );
    TargetValue *TV;
  private:
    double sample_weight; // relative weight
    int occ;
  };
  
}
#endif