/usr/include/speech_tools/EST_Wagon.h is in libestools2.1-dev 1:2.1~release-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | /*************************************************************************/
/* */
/* Centre for Speech Technology Research */
/* University of Edinburgh, UK */
/* Copyright (c) 1996,1997 */
/* All Rights Reserved. */
/* */
/* Permission is hereby granted, free of charge, to use and distribute */
/* this software and its documentation without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of this work, and to */
/* permit persons to whom this work is furnished to do so, subject to */
/* the following conditions: */
/* 1. The code must retain the above copyright notice, this list of */
/* conditions and the following disclaimer. */
/* 2. Any modifications must be clearly marked as such. */
/* 3. Original authors' names are not deleted. */
/* 4. The authors' names are not used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
/* THIS SOFTWARE. */
/* */
/*************************************************************************/
/* Author : Alan W Black */
/* Date : May 1996 */
/*-----------------------------------------------------------------------*/
/* */
/* Public declarations for Wagon (CART builder) */
/* */
/*=======================================================================*/
#ifndef __WAGON_H__
#define __WAGON_H__
#include "EST_String.h"
#include "EST_Val.h"
#include "EST_TVector.h"
#include "EST_TList.h"
#include "EST_simplestats.h" /* For EST_SuffStats class */
#include "EST_Track.h"
#include "siod.h"
#define wagon_error(WMESS) (cerr << WMESS << endl,exit(-1))
// I get floating point exceptions of Alphas when I do any comparisons
// with HUGE_VAL or FLT_MAX so I'll make my own
#define WGN_HUGE_VAL 1.0e20
class WVector : public EST_FVector
{
public:
WVector(int n) : EST_FVector(n) {}
int get_int_val(int n) const { return (int)a_no_check(n); }
float get_flt_val(int n) const { return a_no_check(n); }
void set_int_val(int n,int i) { a_check(n) = (int)i; }
void set_flt_val(int n,float f) { a_check(n) = f; }
};
typedef EST_TList<WVector *> WVectorList;
typedef EST_TVector<WVector *> WVectorVector;
/* Different types of feature */
enum wn_dtype {/* for predictees and predictors */
wndt_binary, wndt_float, wndt_class,
/* for predictees only */
wndt_cluster, wndt_vector, wndt_matrix, wndt_trajectory,
/* for ignored features */
wndt_ignore};
class WDataSet : public WVectorList {
private:
int dlength;
EST_IVector p_type;
EST_IVector p_ignore;
EST_StrVector p_name;
public:
void load_description(const EST_String& descfname,LISP ignores);
void ignore_non_numbers();
int ftype(const int &i) const {return p_type(i);}
int ignore(int i) const {return p_ignore(i); }
void set_ignore(int i,int value) { p_ignore[i] = value; }
const EST_String &feat_name(const int &i) const {return p_name(i);}
int samples(void) const {return length();}
int width(void) const {return dlength;}
};
enum wn_oper {wnop_equal, wnop_binary, wnop_greaterthan,
wnop_lessthan, wnop_is, wnop_in, wnop_matches};
class WQuestion {
private:
int feature_pos;
wn_oper op;
int yes;
int no;
EST_Val operand1;
EST_IList operandl;
float score;
public:
WQuestion() {;}
WQuestion(const WQuestion &s)
{ feature_pos=s.feature_pos;
op=s.op; yes=s.yes; no=s.no; operand1=s.operand1;
operandl = s.operandl; score=s.score;}
~WQuestion() {;}
WQuestion(int fp, wn_oper o,EST_Val a)
{ feature_pos=fp; op=o; operand1=a; }
void set_fp(const int &fp) {feature_pos=fp;}
void set_oper(const wn_oper &o) {op=o;}
void set_operand1(const EST_Val &a) {operand1 = a;}
void set_yes(const int &y) {yes=y;}
void set_no(const int &n) {no=n;}
int get_yes(void) const {return yes;}
int get_no(void) const {return no;}
const int get_fp(void) const {return feature_pos;}
const wn_oper get_op(void) const {return op;}
const EST_Val get_operand1(void) const {return operand1;}
const EST_IList &get_operandl(void) const {return operandl;}
const float get_score(void) const {return score;}
void set_score(const float &f) {score=f;}
const int ask(const WVector &w) const;
friend ostream& operator<<(ostream& s, const WQuestion &q);
};
enum wnim_type {wnim_unset, wnim_float, wnim_class,
wnim_cluster, wnim_vector, wnim_matrix,
wnim_trajectory};
// Impurity measure for cumulating impurities from set of data
class WImpurity {
private:
wnim_type t;
EST_SuffStats a;
EST_DiscreteProbDistribution p;
float cluster_impurity();
float cluster_member_mean(int i);
float vector_impurity();
float trajectory_impurity();
public:
EST_IList members; // Maybe there should be a cluster class
EST_SuffStats **trajectory;
float score;
int l,width;
WImpurity() { t=wnim_unset; a.reset(); trajectory=0; l=0; width=0; }
~WImpurity();
WImpurity(const WVectorVector &ds);
void copy(const WImpurity &s)
{
int i,j;
t=s.t; a=s.a; p=s.p; members=s.members; l=s.l; width=s.width;
score = s.score;
if (s.trajectory)
{
trajectory = new EST_SuffStats *[l];
for (i=0; i<l; i++)
{
trajectory[i] = new EST_SuffStats[width];
for (j=0; j<width; j++)
trajectory[i][j] = s.trajectory[i][j];
}
}
}
WImpurity &operator = (const WImpurity &a) { copy(a); return *this; }
float measure(void);
double samples(void);
wnim_type type(void) const { return t;}
void cumulate(const float pv,double count=1.0);
EST_Val value(void);
EST_DiscreteProbDistribution &pd() { return p; }
float cluster_distance(int i); // distance i from centre in sds
int in_cluster(int i); // distance i from centre < most remote member
float cluster_ranking(int i); // position in closeness to centre
friend ostream& operator<<(ostream &s, WImpurity &imp);
};
class WDlist {
private:
float p_score;
WQuestion p_question;
EST_String p_token;
int p_freq;
int p_samples;
WDlist *next;
public:
WDlist() { next=0; }
~WDlist() { if (next != 0) delete next; }
void set_score(float s) { p_score = s; }
void set_question(const WQuestion &q) { p_question = q; }
void set_best(const EST_String &t,int freq, int samples)
{ p_token = t; p_freq = freq; p_samples = samples;}
float score() const {return p_score;}
const EST_String &token(void) const {return p_token;}
const WQuestion &question() const {return p_question;}
EST_Val predict(const WVector &w);
friend WDlist *add_to_dlist(WDlist *l,WDlist *a);
friend ostream &operator<<(ostream &s, WDlist &d);
};
class WNode {
private:
WVectorVector data;
WQuestion question;
WImpurity impurity;
WNode *left;
WNode *right;
void print_out(ostream &s, int margin);
int leaf(void) const { return ((left == 0) || (right == 0)); }
int pure(void);
public:
WNode() { left = right = 0; }
~WNode() { if (left != 0) {delete left; left=0;}
if (right != 0) {delete right; right=0;} }
WVectorVector &get_data(void) { return data; }
void set_subnodes(WNode *l,WNode *r) { left=l; right=r; }
void set_impurity(const WImpurity &imp) {impurity=imp;}
void set_question(const WQuestion &q) {question=q;}
void prune(void);
void held_out_prune(void);
WImpurity &get_impurity(void) {return impurity;}
WQuestion &get_question(void) {return question;}
EST_Val predict(const WVector &w);
WNode *predict_node(const WVector &d);
int samples(void) const { return data.n(); }
friend ostream& operator<<(ostream &s, WNode &n);
};
extern Discretes wgn_discretes;
extern WDataSet wgn_dataset;
extern WDataSet wgn_test_dataset;
extern EST_FMatrix wgn_DistMatrix;
extern EST_Track wgn_VertexTrack;
extern EST_Track wgn_UnitTrack;
extern EST_Track wgn_VertexFeats;
void wgn_load_datadescription(EST_String fname,LISP ignores);
void wgn_load_dataset(WDataSet &ds,EST_String fname);
WNode *wgn_build_tree(float &score);
WNode *wgn_build_dlist(float &score,ostream *output);
WNode *wagon_stepwise(float limit);
float wgn_score_question(WQuestion &q, WVectorVector &ds);
void wgn_find_split(WQuestion &q,WVectorVector &ds,
WVectorVector &y,WVectorVector &n);
float summary_results(WNode &tree,ostream *output);
extern int wgn_min_cluster_size;
extern int wgn_held_out;
extern int wgn_prune;
extern int wgn_quiet;
extern int wgn_verbose;
extern int wgn_predictee;
extern int wgn_count_field;
extern EST_String wgn_count_field_name;
extern EST_String wgn_predictee_name;
extern float wgn_float_range_split;
extern float wgn_balance;
extern EST_String wgn_opt_param;
extern EST_String wgn_vertex_output;
#define wgn_ques_feature(X) (get_c_string(car(X)))
#define wgn_ques_oper_str(X) (get_c_string(car(cdr(X))))
#define wgn_ques_operand(X) (car(cdr(cdr(X))))
int wagon_ask_question(LISP question, LISP value);
#endif /* __WAGON_H__ */
|