This file is indexed.

/usr/include/mlpack/methods/cf/cf.hpp is in libmlpack-dev 2.2.5-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
/**
 * @file cf.hpp
 * @author Mudit Raj Gupta
 * @author Sumedh Ghaisas
 *
 * Collaborative filtering.
 *
 * Defines the CF class to perform collaborative filtering on the specified data
 * set using alternating least squares (ALS).
 *
 * mlpack is free software; you may redistribute it and/or modify it under the
 * terms of the 3-clause BSD license.  You should have received a copy of the
 * 3-clause BSD license along with mlpack.  If not, see
 * http://www.opensource.org/licenses/BSD-3-Clause for more information.
 */
#ifndef MLPACK_METHODS_CF_CF_HPP
#define MLPACK_METHODS_CF_CF_HPP

#include <mlpack/prereqs.hpp>
#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
#include <mlpack/methods/amf/amf.hpp>
#include <mlpack/methods/amf/update_rules/nmf_als.hpp>
#include <mlpack/methods/amf/termination_policies/simple_residue_termination.hpp>
#include <set>
#include <map>
#include <iostream>

namespace mlpack {
namespace cf /** Collaborative filtering. */ {

/**
 * Template class for factorizer traits. This stores the default values for the
 * variables to be assumed for a given factorizer. If any of the factorizers
 * needs to have a different value for the traits, a template specialization has
 * be wriiten for that factorizer. An example can be found in the module for
 * Regularized SVD.
 */
template<typename FactorizerType>
struct FactorizerTraits
{
  /**
   * If true, then the passed data matrix is used for factorizer.Apply().
   * Otherwise, it is modified into a form suitable for factorization.
   */
  static const bool UsesCoordinateList = false;
};

/**
 * This class implements Collaborative Filtering (CF). This implementation
 * presently supports Alternating Least Squares (ALS) for collaborative
 * filtering.
 *
 * A simple example of how to run Collaborative Filtering is shown below.
 *
 * @code
 * extern arma::mat data; // (user, item, rating) table
 * extern arma::Col<size_t> users; // users seeking recommendations
 * arma::Mat<size_t> recommendations; // Recommendations
 *
 * CF cf(data); // Default options.
 *
 * // Generate 10 recommendations for all users.
 * cf.GetRecommendations(10, recommendations);
 *
 * // Generate 10 recommendations for specified users.
 * cf.GetRecommendations(10, recommendations, users);
 *
 * @endcode
 *
 * The data matrix is a (user, item, rating) table.  Each column in the matrix
 * should have three rows.  The first represents the user; the second represents
 * the item; and the third represents the rating.  The user and item, while they
 * are in a matrix that holds doubles, should hold integer (or size_t) values.
 * The user and item indices are assumed to start at 0.
 *
 * @tparam FactorizerType The type of matrix factorization to use to decompose
 *     the rating matrix (a W and H matrix).  This must implement the method
 *     Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
 */
class CF
{
 public:
  /**
   * Initialize the CF object without performing any factorization.  Be sure to
   * call Train() before calling GetRecommendations() or any other functions!
   */
  CF(const size_t numUsersForSimilarity = 5,
     const size_t rank = 0);

  /**
   * Initialize the CF object using an instantiated factorizer, immediately
   * factorizing the given data to create a model. There are parameters that can
   * be set; default values are provided for each of them. If the rank is left
   * unset (or is set to 0), a simple density-based heuristic will be used to
   * choose a rank.
   *
   * The provided dataset should be a coordinate list; that is, a 3-row matrix
   * where each column corresponds to a (user, item, rating) entry in the
   * matrix.
   *
   * @param data Data matrix: coordinate list or dense matrix.
   * @param factorizer Instantiated factorizer object.
   * @param numUsersForSimilarity Size of the neighborhood.
   * @param rank Rank parameter for matrix factorization.
   */
  template<typename FactorizerType = amf::NMFALSFactorizer>
  CF(const arma::mat& data,
     FactorizerType factorizer = FactorizerType(),
     const size_t numUsersForSimilarity = 5,
     const size_t rank = 0);

  /**
   * Initialize the CF object using an instantiated factorizer, immediately
   * factorizing the given data to create a model. There are parameters that can
   * be set; default values are provided for each of them. If the rank is left
   * unset (or is set to 0), a simple density-based heuristic will be used to
   * choose a rank. Data will be considered in the format of items vs. users and
   * will be passed directly to the factorizer without cleaning.  This overload
   * of the constructor will only be available if the factorizer does not use a
   * coordinate list (i.e. if UsesCoordinateList is false).
   *
   * The U and T template parameters are for SFINAE, so that this overload is
   * only available when the FactorizerType uses a coordinate list.
   *
   * @param data Sparse matrix data.
   * @param factorizer Instantiated factorizer object.
   * @param numUsersForSimilarity Size of the neighborhood.
   * @param rank Rank parameter for matrix factorization.
   */
  template<typename FactorizerType = amf::NMFALSFactorizer>
  CF(const arma::sp_mat& data,
     FactorizerType factorizer = FactorizerType(),
     const size_t numUsersForSimilarity = 5,
     const size_t rank = 0,
     const typename boost::disable_if_c<
         FactorizerTraits<FactorizerType>::UsesCoordinateList>::type* = 0);

  /**
   * Train the CF model (i.e. factorize the input matrix) using the parameters
   * that have already been set for the model (specifically, the rank
   * parameter), and optionally, using the given FactorizerType.
   *
   * @param data Input dataset; coordinate list or dense matrix.
   * @param factorizer Instantiated factorizer.
   */
  template<typename FactorizerType = amf::NMFALSFactorizer>
  void Train(const arma::mat& data,
             FactorizerType factorizer = FactorizerType());

  /**
   * Train the CF model (i.e. factorize the input matrix) using the parameters
   * that have already been set for the model (specifically, the rank
   * parameter), and optionally, using the given FactorizerType.
   *
   * @param data Sparse matrix data.
   * @param factorizer Instantiated factorizer.
   */
  template<typename FactorizerType = amf::NMFALSFactorizer>
  void Train(const arma::sp_mat& data,
             FactorizerType factorizer = FactorizerType(),
             const typename boost::disable_if_c<
                 FactorizerTraits<FactorizerType>::UsesCoordinateList>::type*
                 = 0);

  //! Sets number of users for calculating similarity.
  void NumUsersForSimilarity(const size_t num)
  {
    if (num < 1)
    {
      Log::Warn << "CF::NumUsersForSimilarity(): invalid value (< 1) "
          "ignored." << std::endl;
      return;
    }
    this->numUsersForSimilarity = num;
  }

  //! Gets number of users for calculating similarity.
  size_t NumUsersForSimilarity() const
  {
    return numUsersForSimilarity;
  }

  //! Sets rank parameter for matrix factorization.
  void Rank(const size_t rankValue)
  {
    this->rank = rankValue;
  }

  //! Gets rank parameter for matrix factorization.
  size_t Rank() const
  {
    return rank;
  }

  //! Get the User Matrix.
  const arma::mat& W() const { return w; }
  //! Get the Item Matrix.
  const arma::mat& H() const { return h; }
  //! Get the cleaned data matrix.
  const arma::sp_mat& CleanedData() const { return cleanedData; }

  /**
   * Generates the given number of recommendations for all users.
   *
   * @param numRecs Number of Recommendations
   * @param recommendations Matrix to save recommendations into.
   */
  void GetRecommendations(const size_t numRecs,
                          arma::Mat<size_t>& recommendations);

  /**
   * Generates the given number of recommendations for the specified users.
   *
   * @param numRecs Number of Recommendations
   * @param recommendations Matrix to save recommendations
   * @param users Users for which recommendations are to be generated
   */
  void GetRecommendations(const size_t numRecs,
                          arma::Mat<size_t>& recommendations,
                          arma::Col<size_t>& users);

  //! Converts the User, Item, Value Matrix to User-Item Table
  static void CleanData(const arma::mat& data, arma::sp_mat& cleanedData);

  /**
   * Predict the rating of an item by a particular user.
   *
   * @param user User to predict for.
   * @param item Item to predict for.
   */
  double Predict(const size_t user, const size_t item) const;

  /**
   * Predict ratings for each user-item combination in the given coordinate list
   * matrix.  The matrix 'combinations' should have two rows and number of
   * columns equal to the number of desired predictions.  The first element of
   * each column corresponds to the user index, and the second element of each
   * column corresponds to the item index.  The output vector 'predictions' will
   * have length equal to combinations.n_cols, and predictions[i] will be equal
   * to the prediction for the user/item combination in combinations.col(i).
   *
   * @param combinations User/item combinations to predict.
   * @param predictions Predicted ratings for each user/item combination.
   */
  void Predict(const arma::Mat<size_t>& combinations,
               arma::vec& predictions) const;

  /**
   * Serialize the CF model to the given archive.
   */
  template<typename Archive>
  void Serialize(Archive& ar, const unsigned int /* version */);

 private:
  //! Number of users for similarity.
  size_t numUsersForSimilarity;
  //! Rank used for matrix factorization.
  size_t rank;
  //! User matrix.
  arma::mat w;
  //! Item matrix.
  arma::mat h;
  //! Cleaned data matrix.
  arma::sp_mat cleanedData;

  //! Candidate represents a possible recommendation (value, item).
  typedef std::pair<double, size_t> Candidate;

  //! Compare two candidates based on the value.
  struct CandidateCmp {
    bool operator()(const Candidate& c1, const Candidate& c2)
    {
      return c1.first > c2.first;
    };
  };
}; // class CF

} // namespace cf
} // namespace mlpack

// Include implementation of templated functions.
#include "cf_impl.hpp"

#endif