This file is indexed.

/usr/include/mlpack/methods/kmeans/kmeans.hpp is in libmlpack-dev 2.2.5-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/**
 * @file kmeans.hpp
 * @author Parikshit Ram (pram@cc.gatech.edu)
 *
 * K-Means clustering.
 *
 * mlpack is free software; you may redistribute it and/or modify it under the
 * terms of the 3-clause BSD license.  You should have received a copy of the
 * 3-clause BSD license along with mlpack.  If not, see
 * http://www.opensource.org/licenses/BSD-3-Clause for more information.
 */
#ifndef MLPACK_METHODS_KMEANS_KMEANS_HPP
#define MLPACK_METHODS_KMEANS_KMEANS_HPP

#include <mlpack/prereqs.hpp>

#include <mlpack/core/metrics/lmetric.hpp>
#include "sample_initialization.hpp"
#include "max_variance_new_cluster.hpp"
#include "naive_kmeans.hpp"

#include <mlpack/core/tree/binary_space_tree.hpp>

namespace mlpack {
namespace kmeans /** K-Means clustering. */ {

/**
 * This class implements K-Means clustering, using a variety of possible
 * implementations of Lloyd's algorithm.
 *
 * Four template parameters can (optionally) be supplied: the distance metric to
 * use, the policy for how to find the initial partition of the data, the
 * actions to be taken when an empty cluster is encountered, and the
 * implementation of a single Lloyd step to use.
 *
 * A simple example of how to run K-Means clustering is shown below.
 *
 * @code
 * extern arma::mat data; // Dataset we want to run K-Means on.
 * arma::Row<size_t> assignments; // Cluster assignments.
 * arma::mat centroids; // Cluster centroids.
 *
 * KMeans<> k; // Default options.
 * k.Cluster(data, 3, assignments, centroids); // 3 clusters.
 *
 * // Cluster using the Manhattan distance, 100 iterations maximum, saving only
 * // the centroids.
 * KMeans<metric::ManhattanDistance> k(100);
 * k.Cluster(data, 6, centroids); // 6 clusters.
 * @endcode
 *
 * @tparam MetricType The distance metric to use for this KMeans; see
 *     metric::LMetric for an example.
 * @tparam InitialPartitionPolicy Initial partitioning policy; must implement a
 *     default constructor and either 'void Cluster(const arma::mat&, const
 *     size_t, arma::Row<size_t>&)' or 'void Cluster(const arma::mat&, const
 *     size_t, arma::mat&)'.
 * @tparam EmptyClusterPolicy Policy for what to do on an empty cluster; must
 *     implement a default constructor and 'void EmptyCluster(const arma::mat&
 *     data, const size_t emptyCluster, const arma::mat& oldCentroids,
 *     arma::mat& newCentroids, arma::Col<size_t>& counts, MetricType& metric,
 *     const size_t iteration)'.
 * @tparam LloydStepType Implementation of single Lloyd step to use.
 *
 * @see RandomPartition, SampleInitialization, RefinedStart, AllowEmptyClusters,
 *      MaxVarianceNewCluster, NaiveKMeans, ElkanKMeans
 */
template<typename MetricType = metric::EuclideanDistance,
         typename InitialPartitionPolicy = SampleInitialization,
         typename EmptyClusterPolicy = MaxVarianceNewCluster,
         template<class, class> class LloydStepType = NaiveKMeans,
         typename MatType = arma::mat>
class KMeans
{
 public:
  /**
   * Create a K-Means object and (optionally) set the parameters which K-Means
   * will be run with.
   *
   * @param maxIterations Maximum number of iterations allowed before giving up
   *     (0 is valid, but the algorithm may never terminate).
   * @param metric Optional MetricType object; for when the metric has state
   *     it needs to store.
   * @param partitioner Optional InitialPartitionPolicy object; for when a
   *     specially initialized partitioning policy is required.
   * @param emptyClusterAction Optional EmptyClusterPolicy object; for when a
   *     specially initialized empty cluster policy is required.
   */
  KMeans(const size_t maxIterations = 1000,
         const MetricType metric = MetricType(),
         const InitialPartitionPolicy partitioner = InitialPartitionPolicy(),
         const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy());


  /**
   * Perform k-means clustering on the data, returning a list of cluster
   * assignments.  Optionally, the vector of assignments can be set to an
   * initial guess of the cluster assignments; to do this, set initialGuess to
   * true.
   *
   * @tparam MatType Type of matrix (arma::mat or arma::sp_mat).
   * @param data Dataset to cluster.
   * @param clusters Number of clusters to compute.
   * @param assignments Vector to store cluster assignments in.
   * @param initialGuess If true, then it is assumed that assignments has a list
   *      of initial cluster assignments.
   */
  void Cluster(const MatType& data,
               const size_t clusters,
               arma::Row<size_t>& assignments,
               const bool initialGuess = false);

  /**
   * Perform k-means clustering on the data, returning the centroids of each
   * cluster in the centroids matrix.  Optionally, the initial centroids can be
   * specified by filling the centroids matrix with the initial centroids and
   * specifying initialGuess = true.
   *
   * @tparam MatType Type of matrix (arma::mat or arma::sp_mat).
   * @param data Dataset to cluster.
   * @param clusters Number of clusters to compute.
   * @param centroids Matrix in which centroids are stored.
   * @param initialGuess If true, then it is assumed that centroids contains the
   *      initial cluster centroids.
   */
  void Cluster(const MatType& data,
               const size_t clusters,
               arma::mat& centroids,
               const bool initialGuess = false);

  /**
   * Perform k-means clustering on the data, returning a list of cluster
   * assignments and also the centroids of each cluster.  Optionally, the vector
   * of assignments can be set to an initial guess of the cluster assignments;
   * to do this, set initialAssignmentGuess to true.  Another way to set initial
   * cluster guesses is to fill the centroids matrix with the centroid guesses,
   * and then set initialCentroidGuess to true.  initialAssignmentGuess
   * supersedes initialCentroidGuess, so if both are set to true, the
   * assignments vector is used.
   *
   * @tparam MatType Type of matrix (arma::mat or arma::sp_mat).
   * @param data Dataset to cluster.
   * @param clusters Number of clusters to compute.
   * @param assignments Vector to store cluster assignments in.
   * @param centroids Matrix in which centroids are stored.
   * @param initialAssignmentGuess If true, then it is assumed that assignments
   *      has a list of initial cluster assignments.
   * @param initialCentroidGuess If true, then it is assumed that centroids
   *      contains the initial centroids of each cluster.
   */
  void Cluster(const MatType& data,
               const size_t clusters,
               arma::Row<size_t>& assignments,
               arma::mat& centroids,
               const bool initialAssignmentGuess = false,
               const bool initialCentroidGuess = false);

  //! Get the maximum number of iterations.
  size_t MaxIterations() const { return maxIterations; }
  //! Set the maximum number of iterations.
  size_t& MaxIterations() { return maxIterations; }

  //! Get the distance metric.
  const MetricType& Metric() const { return metric; }
  //! Modify the distance metric.
  MetricType& Metric() { return metric; }

  //! Get the initial partitioning policy.
  const InitialPartitionPolicy& Partitioner() const { return partitioner; }
  //! Modify the initial partitioning policy.
  InitialPartitionPolicy& Partitioner() { return partitioner; }

  //! Get the empty cluster policy.
  const EmptyClusterPolicy& EmptyClusterAction() const
  { return emptyClusterAction; }
  //! Modify the empty cluster policy.
  EmptyClusterPolicy& EmptyClusterAction() { return emptyClusterAction; }

  //! Serialize the k-means object.
  template<typename Archive>
  void Serialize(Archive& ar, const unsigned int version);

 private:
  //! Maximum number of iterations before giving up.
  size_t maxIterations;
  //! Instantiated distance metric.
  MetricType metric;
  //! Instantiated initial partitioning policy.
  InitialPartitionPolicy partitioner;
  //! Instantiated empty cluster policy.
  EmptyClusterPolicy emptyClusterAction;
};

} // namespace kmeans
} // namespace mlpack

// Include implementation.
#include "kmeans_impl.hpp"

#endif // MLPACK_METHODS_KMEANS_KMEANS_HPP