/usr/include/shogun/converter/LocallyLinearEmbedding.h is in libshogun-dev 1.1.0-4ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | /*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#ifndef LOCALLYLINEAREMBEDDING_H_
#define LOCALLYLINEAREMBEDDING_H_
#include <shogun/lib/config.h>
#ifdef HAVE_LAPACK
#include <shogun/converter/EmbeddingConverter.h>
#include <shogun/features/Features.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/distance/Distance.h>
namespace shogun
{
class CFeatures;
class CDistance;
/** @brief the class LocallyLinearEmbedding used to preprocess
* data using Locally Linear Embedding algorithm described in
*
* Saul, L. K., Ave, P., Park, F., & Roweis, S. T. (2001).
* An Introduction to Locally Linear Embedding. Available from, 290(5500), 2323-2326.
* Retrieved from:
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7319&rep=rep1&type=pdf
*
* The process of finding nearest neighbors is parallel and
* involves Fibonacci Heap and Euclidian distance.
*
* Linear reconstruction step runs in parallel for objects and
* involves LAPACK routine DPOSV for solving a system of linear equations.
*
* The eigenproblem stated in the algorithm is solved with LAPACK routine
* DSYEVR or with ARPACK DSAUPD/DSEUPD routines if available.
*
* Due to computation speed, ARPACK is being used with small
* regularization of weight matrix and Cholesky factorization is used
* internally for Lanzcos iterations (in case of only LAPACK is available)
* and SUPERLU library for fast solving sparse equations stated by ARPACK
* is being used if available.
*
* This class also have capability of selecting k automatically in
* range between "k" and "max_k" in case if "auto_k" is true. This
* is being done using ternary search of minima of
* the mean reconstruction error. The reconstruction error is
* considered to have only one global minimum in this mode.
*
* It is optimized with alignment formulation as described in
*
* Zhao, D. (2006).
* Formulating LLE using alignment technique.
* Pattern Recognition, 39(11), 2233-2235.
* Retrieved from http://linkinghub.elsevier.com/retrieve/pii/S0031320306002160
*
*/
class CLocallyLinearEmbedding: public CEmbeddingConverter
{
public:
/** constructor */
CLocallyLinearEmbedding();
/** destructor */
virtual ~CLocallyLinearEmbedding();
/** apply preprocessor to features
* @param features
*/
virtual CFeatures* apply(CFeatures* features);
/** setter for k parameter
* @param k k value
*/
void set_k(int32_t k);
/** getter for k parameter
* @return m_k value
*/
int32_t get_k() const;
/** setter for max_k parameter
* @param max_k max_k value
*/
void set_max_k(int32_t max_k);
/** getter for max_k parameter
* @return m_max_k value
*/
int32_t get_max_k() const;
/** setter for auto_k parameter
* @param auto_k auto_k value
*/
void set_auto_k(bool auto_k);
/** getter for auto_k parameter
* @return m_auto_k value
*/
bool get_auto_k() const;
/** setter for reconstruction shift parameter
* @param reconstruction_shift reconstruction shift value
*/
void set_reconstruction_shift(float64_t reconstruction_shift);
/** getter for reconstruction shift parameter
* @return m_reconstruction_shift value
*/
float64_t get_reconstruction_shift() const;
/** setter for nullspace shift parameter
* @param nullspace_shift nullsapce shift value
*/
void set_nullspace_shift(float64_t nullspace_shift);
/** getter for nullspace shift parameter
* @return m_nullspace_shift value
*/
float64_t get_nullspace_shift() const;
/** setter for use arpack parameter
* @param use_arpack use arpack value
*/
void set_use_arpack(bool use_arpack);
/** getter for use arpack parameter
* @return use_arpack value
*/
bool get_use_arpack() const;
/** get name */
virtual const char* get_name() const;
/// HELPERS
protected:
/** default init */
void init();
/** constructs weight matrix
* @param simple_features features to be used
* @param W_matrix weight matrix
* @param neighborhood_matrix matrix containing neighbor idxs
*/
virtual SGMatrix<float64_t> construct_weight_matrix(CSimpleFeatures<float64_t>* simple_features,float64_t* W_matrix,
SGMatrix<int32_t> neighborhood_matrix);
/** constructs embedding
* @param matrix computed weight matrix
* @param dimension dimension of embedding
* @return embedding features
*/
virtual SGMatrix<float64_t> construct_embedding(SGMatrix<float64_t> matrix,int dimension);
/** constructs neighborhood matrix by distance
* @param distance_matrix distance matrix to be used
* @param k number of neighbors
* @return matrix containing indexes of neighbors of i-th vector in i-th column
*/
virtual SGMatrix<int32_t> get_neighborhood_matrix(SGMatrix<float64_t> distance_matrix, int32_t k);
/** estimates k using ternary search
* @param simple_features simple features to use
* @param neighborhood_matrix matrix containing indexes of neighbors for every vector
* @return optimal k (in means of reconstruction error)
*/
int32_t estimate_k(CSimpleFeatures<float64_t>* simple_features, SGMatrix<int32_t> neighborhood_matrix);
/** computes reconstruction error using subset of given features
* @param k
* @param dim
* @param N
* @param feature_matrix
* @param z_matrix
* @param covariance_matrix
* @param resid_vector
* @param id_vector
* @param neighborhood_matrix
* @return residual sum
*/
float64_t compute_reconstruction_error(int32_t k, int dim, int N, float64_t* feature_matrix,
float64_t* z_matrix, float64_t* covariance_matrix,
float64_t* resid_vector, float64_t* id_vector,
SGMatrix<int32_t> neighborhood_matrix);
/// FIELDS
protected:
/** number of neighbors */
int32_t m_k;
/** maximum number of neighbors */
int32_t m_max_k;
/** regularization shift of reconstruction step */
float64_t m_reconstruction_shift;
/** regularization shift of nullspace finding step */
float64_t m_nullspace_shift;
/** whether use arpack or not */
bool m_use_arpack;
/** whether use automatic k or not */
bool m_auto_k;
/// THREADS
protected:
/** runs neighborhood determination thread
* @param p thread params
*/
static void* run_neighborhood_thread(void* p);
/** runs linear reconstruction thread
* @param p thread params
*/
static void* run_linearreconstruction_thread(void* p);
};
}
#endif /* HAVE_LAPACK */
#endif /* LOCALLYLINEAREMBEDDING_H_ */
|