/usr/include/mlpack/methods/pca/pca.hpp is in libmlpack-dev 2.1.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | /**
* @file pca.hpp
* @author Ajinkya Kale
* @author Ryan Curtin
* @author Marcus Edel
*
* Defines the PCA class to perform Principal Components Analysis on the
* specified data set. There are many variations on how to do this, so
* template parameters allow the selection of different techniques.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_PCA_PCA_HPP
#define MLPACK_METHODS_PCA_PCA_HPP
#include <mlpack/core.hpp>
#include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
namespace mlpack {
namespace pca {
/**
* This class implements principal components analysis (PCA). This is a
* common, widely-used technique that is often used for either dimensionality
* reduction or transforming data into a better basis. Further information on
* PCA can be found in almost any statistics or machine learning textbook, and
* all over the internet. Note this class will be changed to have the name PCA
* in mlpack 3.0.0
*/
template<typename DecompositionPolicy = ExactSVDPolicy>
class PCAType
{
public:
/**
* Create the PCA object, specifying if the data should be scaled in each
* dimension by standard deviation when PCA is performed.
*
* @param scaleData Whether or not to scale the data.
*/
PCAType(const bool scaleData = false,
const DecompositionPolicy& decomposition = DecompositionPolicy());
/**
* Apply Principal Component Analysis to the provided data set. It is safe
* to pass the same matrix reference for both data and transformedData.
*
* @param data Data matrix.
* @param transformedData Matrix to put results of PCA into.
* @param eigval Vector to put eigenvalues into.
* @param eigvec Matrix to put eigenvectors (loadings) into.
*/
void Apply(const arma::mat& data,
arma::mat& transformedData,
arma::vec& eigVal,
arma::mat& eigvec);
/**
* Apply Principal Component Analysis to the provided data set. It is safe
* to pass the same matrix reference for both data and transformedData.
*
* @param data Data matrix.
* @param transformedData Matrix to store results of PCA in.
* @param eigVal Vector to put eigenvalues into.
*/
void Apply(const arma::mat& data,
arma::mat& transformedData,
arma::vec& eigVal);
/**
* Use PCA for dimensionality reduction on the given dataset. This will save
* the newDimension largest principal components of the data and remove the
* rest. The parameter returned is the amount of variance of the data that
* is retained; this is a value between 0 and 1. For instance, a value of
* 0.9 indicates that 90% of the variance present in the data was retained.
*
* @param data Data matrix.
* @param newDimension New dimension of the data.
* @return Amount of the variance of the data retained (between 0 and 1).
*/
double Apply(arma::mat& data, const size_t newDimension);
//! This overload is here to make sure int gets casted right to size_t.
inline double Apply(arma::mat& data, const int newDimension)
{
return Apply(data, size_t(newDimension));
}
/**
* Use PCA for dimensionality reduction on the given dataset. This will save
* as many dimensions as necessary to retain at least the given amount of
* variance (specified by parameter varRetained). The amount should be
* between 0 and 1; if the amount is 0, then only 1 dimension will be
* retained. If the amount is 1, then all dimensions will be retained.
*
* The method returns the actual amount of variance retained, which will
* always be greater than or equal to the varRetained parameter.
*
* @param data Data matrix.
* @param varRetained Lower bound on amount of variance to retain; should be
* between 0 and 1.
* @return Actual amount of variance retained (between 0 and 1).
*/
double Apply(arma::mat& data, const double varRetained);
//! Get whether or not this PCA object will scale (by standard deviation)
//! the data when PCA is performed.
bool ScaleData() const { return scaleData; }
//! Modify whether or not this PCA object will scale (by standard deviation)
//! the data when PCA is performed.
bool& ScaleData() { return scaleData; }
private:
//! Scaling the data is when we reduce the variance of each dimension to 1.
void ScaleData(arma::mat& centeredData)
{
if (scaleData)
{
// Scaling the data is when we reduce the variance of each dimension
// to 1. We do this by dividing each dimension by its standard
// deviation.
arma::vec stdDev = arma::stddev(
centeredData, 0, 1 /* for each dimension */);
// If there are any zeroes, make them very small.
for (size_t i = 0; i < stdDev.n_elem; ++i)
if (stdDev[i] == 0)
stdDev[i] = 1e-50;
centeredData /= arma::repmat(stdDev, 1, centeredData.n_cols);
}
}
//! Whether or not the data will be scaled by standard deviation when PCA is
//! performed.
bool scaleData;
//! Decomposition method used to perform principal components analysis.
DecompositionPolicy decomposition;
}; // class PCA
//! 3.0.0 TODO: break reverse-compatibility by changing PCAType to PCA.
typedef PCAType<ExactSVDPolicy> PCA;
} // namespace pca
} // namespace mlpack
// Include implementation.
#include "pca_impl.hpp"
#endif
|