/usr/include/mlpack/methods/pca/pca.hpp is in libmlpack-dev 2.0.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | /**
* @file pca.hpp
* @author Ajinkya Kale
*
* Defines the PCA class to perform Principal Components Analysis on the
* specified data set.
*
* This file is part of mlpack 2.0.1.
*
* mlpack is free software; you may redstribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef __MLPACK_METHODS_PCA_PCA_HPP
#define __MLPACK_METHODS_PCA_PCA_HPP
#include <mlpack/core.hpp>
namespace mlpack {
namespace pca {
/**
* This class implements principal components analysis (PCA). This is a common,
* widely-used technique that is often used for either dimensionality reduction
* or transforming data into a better basis. Further information on PCA can be
* found in almost any statistics or machine learning textbook, and all over the
* internet.
*/
class PCA
{
public:
/**
* Create the PCA object, specifying if the data should be scaled in each
* dimension by standard deviation when PCA is performed.
*
* @param scaleData Whether or not to scale the data.
*/
PCA(const bool scaleData = false);
/**
* Apply Principal Component Analysis to the provided data set. It is safe to
* pass the same matrix reference for both data and transformedData.
*
* @param data Data matrix.
* @param transformedData Matrix to put results of PCA into.
* @param eigval Vector to put eigenvalues into.
* @param eigvec Matrix to put eigenvectors (loadings) into.
*/
void Apply(const arma::mat& data,
arma::mat& transformedData,
arma::vec& eigval,
arma::mat& eigvec) const;
/**
* Apply Principal Component Analysis to the provided data set. It is safe to
* pass the same matrix reference for both data and transformedData.
*
* @param data Data matrix.
* @param transformedData Matrix to store results of PCA in.
* @param eigVal Vector to put eigenvalues into.
*/
void Apply(const arma::mat& data,
arma::mat& transformedData,
arma::vec& eigVal) const;
/**
* Use PCA for dimensionality reduction on the given dataset. This will save
* the newDimension largest principal components of the data and remove the
* rest. The parameter returned is the amount of variance of the data that is
* retained; this is a value between 0 and 1. For instance, a value of 0.9
* indicates that 90% of the variance present in the data was retained.
*
* @param data Data matrix.
* @param newDimension New dimension of the data.
* @return Amount of the variance of the data retained (between 0 and 1).
*/
double Apply(arma::mat& data, const size_t newDimension) const;
//! This overload is here to make sure int gets casted right to size_t.
inline double Apply(arma::mat& data, const int newDimension) const
{
return Apply(data, size_t(newDimension));
}
/**
* Use PCA for dimensionality reduction on the given dataset. This will save
* as many dimensions as necessary to retain at least the given amount of
* variance (specified by parameter varRetained). The amount should be
* between 0 and 1; if the amount is 0, then only 1 dimension will be
* retained. If the amount is 1, then all dimensions will be retained.
*
* The method returns the actual amount of variance retained, which will
* always be greater than or equal to the varRetained parameter.
*
* @param data Data matrix.
* @param varRetained Lower bound on amount of variance to retain; should be
* between 0 and 1.
* @return Actual amount of variance retained (between 0 and 1).
*/
double Apply(arma::mat& data, const double varRetained) const;
//! Get whether or not this PCA object will scale (by standard deviation) the
//! data when PCA is performed.
bool ScaleData() const { return scaleData; }
//! Modify whether or not this PCA object will scale (by standard deviation)
//! the data when PCA is performed.
bool& ScaleData() { return scaleData; }
private:
//! Whether or not the data will be scaled by standard deviation when PCA is
//! performed.
bool scaleData;
}; // class PCA
} // namespace pca
} // namespace mlpack
#endif
|