/usr/include/mlpack/methods/dbscan/dbscan.hpp is in libmlpack-dev 2.2.5-1build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | /**
* @file dbscan.hpp
* @author Ryan Curtin
*
* An implementation of the DBSCAN clustering method, which is flexible enough
* to support other algorithms for finding nearest neighbors.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_DBSCAN_DBSCAN_HPP
#define MLPACK_METHODS_DBSCAN_DBSCAN_HPP
#include <mlpack/core.hpp>
#include <mlpack/methods/range_search/range_search.hpp>
#include <mlpack/methods/emst/union_find.hpp>
#include "random_point_selection.hpp"
#include <boost/dynamic_bitset.hpp>
namespace mlpack {
namespace dbscan {
/**
* DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a
* clustering technique described in the following paper:
*
* @code
* @inproceedings{ester1996density,
* title={A density-based algorithm for discovering clusters in large spatial
* databases with noise.},
* author={Ester, M. and Kriegel, H.-P. and Sander, J. and Xu, X.},
* booktitle={Proceedings of the Second International Conference on Knowledge
* Discovery and Data Mining (KDD '96)},
* pages={226--231},
* year={1996}
* }
* @endcode
*
* The DBSCAN algorithm iteratively clusters points using range searches with a
* specified radius parameter. This implementation allows configuration of the
* range search technique used and the point selection strategy by means of
* template parameters.
*
* @tparam RangeSearchType Class to use for range searching.
* @tparam PointSelectionPolicy Strategy for selecting next point to cluster
* with.
*/
template<typename RangeSearchType = range::RangeSearch<>,
typename PointSelectionPolicy = RandomPointSelection>
class DBSCAN
{
public:
/**
* Construct the DBSCAN object with the given parameters. The batchMode
* parameter should be set to false in the case where RAM issues will be
* encountered (i.e. if the dataset is very large or if epsilon is large).
* When batchMode is false, each point will be searched iteratively, which
* could be slower but will use less memory.
*
* @param epsilon Size of range query.
* @param minPoints Minimum number of points for each cluster.
* @param batchMode If true, all points are searched in batch.
* @param rangeSearch Optional instantiated RangeSearch object.
* @param pointSelector OptionL instantiated PointSelectionPolicy object.
*/
DBSCAN(const double epsilon,
const size_t minPoints,
const bool batchMode = true,
RangeSearchType rangeSearch = RangeSearchType(),
PointSelectionPolicy pointSelector = PointSelectionPolicy());
/**
* Performs DBSCAN clustering on the data, returning number of clusters
* and also the centroid of each cluster.
*
* @tparam MatType Type of matrix (arma::mat or arma::sp_mat).
* @param data Dataset to cluster.
* @param centroids Matrix in which centroids are stored.
*/
template<typename MatType>
size_t Cluster(const MatType& data,
arma::mat& centroids);
/**
* Performs DBSCAN clustering on the data, returning number of clusters
* and also the list of cluster assignments.
*
* @tparam MatType Type of matrix (arma::mat or arma::sp_mat).
* @param data Dataset to cluster.
* @param assignments Vector to store cluster assignments.
*/
template<typename MatType>
size_t Cluster(const MatType& data,
arma::Row<size_t>& assignments);
/**
* Performs DBSCAN clustering on the data, returning number of clusters,
* the centroid of each cluster and also the list of cluster assignments.
* If assignments[i] == assignments.n_elem - 1, then the point is considered
* "noise".
*
* @tparam MatType Type of matrix (arma::mat or arma::sp_mat).
* @param data Dataset to cluster.
* @param assignments Vector to store cluster assignments.
* @param centroids Matrix in which centroids are stored.
*/
template<typename MatType>
size_t Cluster(const MatType& data,
arma::Row<size_t>& assignments,
arma::mat& centroids);
private:
//! Maximum distance between two points to be part of same cluster.
double epsilon;
//! Minimum number of points to be in the epsilon-neighborhood (including
//! itself) for the point to be a core-point.
size_t minPoints;
//! Whether or not to perform the search in batch mode. If false, single
bool batchMode;
//! Instantiated range search policy.
RangeSearchType rangeSearch;
//! Instantiated point selection policy.
PointSelectionPolicy pointSelector;
/**
* Performs DBSCAN clustering on the data, returning the number of clusters and
* also the list of cluster assignments. This searches each point iteratively,
* and can save on RAM usage. It may be slower than the batch search with a
* dual-tree algorithm.
*
* @param data Dataset to cluster.
* @param assignments Assignments for each point.
* @param uf UnionFind structure that will be modified.
*/
template<typename MatType>
void PointwiseCluster(const MatType& data,
emst::UnionFind& uf);
/**
* Performs DBSCAN clustering on the data, returning number of clusters
* and also the list of cluster assignments. This can perform search in batch,
* so it is well suited for dual-tree or naive search.
*
* @param data Dataset to cluster.
* @param assignments Assignments for each point.
* @param uf UnionFind structure that will be modified.
*/
template<typename MatType>
void BatchCluster(const MatType& data,
emst::UnionFind& uf);
};
} // namespace dbscan
} // namespace mlpack
// Include implementation.
#include "dbscan_impl.hpp"
#endif
|