/usr/include/shogun/classifier/svm/ssl.h is in libshogun-dev 1.1.0-4ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | /* Copyright 2006 Vikas Sindhwani (vikass@cs.uchicago.edu)
SVM-lin: Fast SVM Solvers for Supervised and Semi-supervised Learning
This file is part of SVM-lin.
SVM-lin is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SVM-lin is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SVM-lin (see gpl.txt); if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef DOXYGEN_SHOULD_SKIP_THIS
#ifndef _SSL_H
#define _SSL_H
/* OPTIMIZATION CONSTANTS */
#define CGITERMAX 10000 /* maximum number of CGLS iterations */
#define SMALL_CGITERMAX 10 /* for heuristic 1 in reference [2] */
#define EPSILON 1e-6 /* most tolerances are set to this value */
#define BIG_EPSILON 0.01 /* for heuristic 2 in reference [2] */
#define RELATIVE_STOP_EPS 1e-9 /* for L2-SVM-MFN relative stopping criterion */
#define MFNITERMAX 50 /* maximum number of MFN iterations */
#define TSVM_ANNEALING_RATE 1.5 /* rate at which lambda_u is increased in TSVM */
#define TSVM_LAMBDA_SMALL 1e-5 /* lambda_u starts from this value */
#define DA_ANNEALING_RATE 1.5 /* annealing rate for DA */
#define DA_INIT_TEMP 10 /* initial temperature relative to lambda_u */
#define DA_INNER_ITERMAX 100 /* maximum fixed temperature iterations for DA */
#define DA_OUTER_ITERMAX 30 /* maximum number of outer loops for DA */
#include <shogun/lib/common.h>
#include <shogun/features/DotFeatures.h>
namespace shogun
{
/** Data: Input examples are stored in sparse (Compressed Row Storage) format */
struct data
{
/** number of examples */
int32_t m;
/** number of labeled examples */
int32_t l;
/** number of unlabeled examples l+u = m */
int32_t u;
/** number of features */
int32_t n;
/** number of non-zeros */
int32_t nz;
/** features */
shogun::CDotFeatures* features;
/** labels */
float64_t *Y;
/** cost associated with each example */
float64_t *C;
};
/** defines a vector of doubles */
struct vector_double
{
/** number of elements */
int32_t d;
/** ptr to vector elements*/
float64_t *vec;
};
/** defines a vector of ints for index subsets */
struct vector_int
{
/** number of elements */
int32_t d;
/** ptr to vector elements */
int32_t *vec;
};
enum { RLS, SVM, TSVM, DA_SVM }; /* currently implemented algorithms */
/** various options user + internal optimisation */
struct options
{
/* user options */
/** regularization parameter */
int32_t algo;
/** regularization parameter */
float64_t lambda;
/** regularization parameter over unlabeled examples */
float64_t lambda_u;
/** maximum number of TSVM switches per fixed-weight label optimization */
int32_t S;
/** expected fraction of unlabeled examples in positive class */
float64_t R;
/** cost for positive examples */
float64_t Cp;
/** cost for negative examples */
float64_t Cn;
/* internal optimization options */
/** all tolerances */
float64_t epsilon;
/** max iterations for CGLS */
int32_t cgitermax;
/** max iterations for L2_SVM_MFN */
int32_t mfnitermax;
/** 1.0 if bias is to be used, 0.0 otherwise */
float64_t bias;
};
/** used in line search */
class Delta {
public:
/** default constructor */
Delta() { delta=0.0; index=0;s=0; }
/** delta */
float64_t delta;
/** index */
int32_t index;
/** s */
int32_t s;
};
inline bool operator<(const Delta& a , const Delta& b)
{
return (a.delta < b.delta);
}
void initialize(struct vector_double *A, int32_t k, float64_t a);
/* initializes a vector_double to be of length k, all elements set to a */
void initialize(struct vector_int *A, int32_t k);
/* initializes a vector_int to be of length k, elements set to 1,2..k. */
void GetLabeledData(struct data *Data_Labeled, const struct data *Data);
/* extracts labeled data from Data and copies it into Data_Labeled */
float64_t norm_square(const vector_double *A); /* returns squared length of A */
/* ssl_train: takes data, options, uninitialized weight and output
vector_doubles, routes it to the algorithm */
/* the learnt weight vector and the outputs it gives on the data matrix are saved */
void ssl_train(
struct data *Data,
struct options *Options,
struct vector_double *W, /* weight vector */
struct vector_double *O); /* output vector */
/* svmlin algorithms and their subroutines */
/* Conjugate Gradient for Sparse Linear Least Squares Problems */
/* Solves: min_w 0.5*Options->lamda*w'*w + 0.5*sum_{i in Subset} Data->C[i] (Y[i]- w' x_i)^2 */
/* over a subset of examples x_i specified by vector_int Subset */
int32_t CGLS(
const struct data *Data,
const struct options *Options,
const struct vector_int *Subset,
struct vector_double *Weights,
struct vector_double *Outputs);
/* Linear Modified Finite Newton L2-SVM*/
/* Solves: min_w 0.5*Options->lamda*w'*w + 0.5*sum_i Data->C[i] max(0,1 - Y[i] w' x_i)^2 */
int32_t L2_SVM_MFN(
const struct data *Data,
struct options *Options,
struct vector_double *Weights,
struct vector_double *Outputs,
int32_t ini); /* use ini=0 if no good starting guess for Weights, else 1 */
float64_t line_search(
float64_t *w,
float64_t *w_bar,
float64_t lambda,
float64_t *o,
float64_t *o_bar,
float64_t *Y,
float64_t *C,
int32_t d,
int32_t l);
/* Transductive L2-SVM */
/* Solves : min_(w, Y[i],i in UNlabeled) 0.5*Options->lamda*w'*w + 0.5*(1/Data->l)*sum_{i in labeled} max(0,1 - Y[i] w' x_i)^2 + 0.5*(Options->lambda_u/Data->u)*sum_{i in UNlabeled} max(0,1 - Y[i] w' x_i)^2
subject to: (1/Data->u)*sum_{i in UNlabeled} max(0,Y[i]) = Options->R */
int32_t TSVM_MFN(
const struct data *Data,
struct options *Options,
struct vector_double *Weights,
struct vector_double *Outputs);
int32_t switch_labels(
float64_t* Y,
float64_t* o,
int32_t* JU,
int32_t u,
int32_t S);
/* Deterministic Annealing*/
int32_t DA_S3VM(
struct data *Data,
struct options *Options,
struct vector_double *Weights,
struct vector_double *Outputs);
void optimize_p(
const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t*p);
int32_t optimize_w(
const struct data *Data,
const float64_t *p,
struct options *Options,
struct vector_double *Weights,
struct vector_double *Outputs,
int32_t ini);
float64_t transductive_cost(
float64_t normWeights,
float64_t *Y,
float64_t *Outputs,
int32_t m,
float64_t lambda,
float64_t lambda_u);
float64_t entropy(const float64_t *p, int32_t u);
/* KL-divergence */
float64_t KL(const float64_t *p, const float64_t *q, int32_t u);
}
#endif // _SSL_H
#endif // DOXYGEN_SHOULD_SKIP_THIS
|