/usr/include/vtk-7.1/vtkKMeansStatistics.h is in libvtk7-dev 7.1.1+dfsg1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | /*=========================================================================
Program: Visualization Toolkit
Module: vtkKMeansStatistics.h
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
All rights reserved.
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notice for more information.
=========================================================================*/
/*-------------------------------------------------------------------------
Copyright 2010 Sandia Corporation.
Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------*/
/**
* @class vtkKMeansStatistics
* @brief A class for KMeans clustering
*
*
* This class takes as input an optional vtkTable on port LEARN_PARAMETERS
* specifying initial set(s) of cluster values of the following form:
* <pre>
* K | Col1 | ... | ColN
* -----------+-----------------+---------+---------------
* M |clustCoord(1, 1) | ... | clustCoord(1, N)
* M |clustCoord(2, 1) | ... | clustCoord(2, N)
* . | . | . | .
* . | . | . | .
* . | . | . | .
* M |clustCoord(M, 1) | ... | clustCoord(M, N)
* L |clustCoord(1, 1) | ... | clustCoord(1, N)
* L |clustCoord(2, 1) | ... | clustCoord(2, N)
* . | . | . | .
* . | . | . | .
* . | . | . | .
* L |clustCoord(L, 1) | ... | clustCoord(L, N)
* </pre>
*
* Because the desired value of K is often not known in advance and the
* results of the algorithm are dependent on the initial cluster centers,
* we provide a mechanism for the user to test multiple runs or sets of cluster centers
* within a single call to the Learn phase. The first column of the table identifies
* the number of clusters K in the particular run (the entries in this column should be
* of type vtkIdType), while the remaining columns are a
* subset of the columns contained in the table on port INPUT_DATA. We require that
* all user specified clusters be of the same dimension N and consequently, that the
* LEARN_PARAMETERS table have N+1 columns. Due to this restriction, only one request
* can be processed for each call to the Learn phase and subsequent requests are
* silently ignored. Note that, if the first column of the LEARN_PARAMETERS table is not
* of type vtkIdType, then the table will be ignored and a single run will be performed using
* the first DefaultNumberOfClusters input data observations as initial cluster centers.
*
* When the user does not supply an initial set of clusters, then the first
* DefaultNumberOfClusters input data observations are used as initial cluster
* centers and a single run is performed.
*
*
* This class provides the following functionalities, depending on the operation
* in which it is executed:
* * Learn: calculates new cluster centers for each run. The output metadata on
* port OUTPUT_MODEL is a multiblock dataset containing at a minimum
* one vtkTable with columns specifying the following for each run:
* the run ID, number of clusters, number of iterations required for convergence,
* total error associated with the cluster (sum of squared Euclidean distance from each observation
* to its nearest cluster center), the cardinality of the cluster, and the new
* cluster coordinates.
*
* * Derive: An additional vtkTable is stored in the multiblock dataset output on port OUTPUT_MODEL.
* This table contains columns that store for each run: the runID, number of clusters,
* total error for all clusters in the run, local rank, and global rank.
* The local rank is computed by comparing squared Euclidean errors of all runs with
* the same number of clusters. The global rank is computed analagously across all runs.
*
* * Assess: This requires a multiblock dataset (as computed from Learn and Derive) on input port INPUT_MODEL
* and tabular data on input port INPUT_DATA that contains column names matching those
* of the tables on input port INPUT_MODEL. The assess mode reports the closest cluster center
* and associated squared Euclidean distance of each observation in port INPUT_DATA's table to the cluster centers for
* each run in the multiblock dataset provided on port INPUT_MODEL.
*
* The code can handle a wide variety of data types as it operates on vtkAbstractArrays
* and is not limited to vtkDataArrays. A default distance functor that
* computes the sum of the squares of the Euclidean distance between two objects is provided
* (vtkKMeansDistanceFunctor). The default distance functor can be overridden to use alternative distance metrics.
*
* @par Thanks:
* Thanks to Janine Bennett, David Thompson, and Philippe Pebay of
* Sandia National Laboratories for implementing this class.
* Updated by Philippe Pebay, Kitware SAS 2012
*/
#ifndef vtkKMeansStatistics_h
#define vtkKMeansStatistics_h
#include "vtkFiltersStatisticsModule.h" // For export macro
#include "vtkStatisticsAlgorithm.h"
class vtkIdTypeArray;
class vtkIntArray;
class vtkDoubleArray;
class vtkKMeansDistanceFunctor;
class vtkMultiBlockDataSet;
class VTKFILTERSSTATISTICS_EXPORT vtkKMeansStatistics : public vtkStatisticsAlgorithm
{
public:
vtkTypeMacro(vtkKMeansStatistics, vtkStatisticsAlgorithm);
void PrintSelf( ostream& os, vtkIndent indent ) VTK_OVERRIDE;
static vtkKMeansStatistics* New();
//@{
/**
* Set the DistanceFunctor.
*/
virtual void SetDistanceFunctor( vtkKMeansDistanceFunctor* );
vtkGetObjectMacro(DistanceFunctor,vtkKMeansDistanceFunctor);
//@}
//@{
/**
* Set/get the \a DefaultNumberOfClusters, used when no initial cluster coordinates are specified.
*/
vtkSetMacro(DefaultNumberOfClusters, int);
vtkGetMacro(DefaultNumberOfClusters, int);
//@}
//@{
/**
* Set/get the KValuesArrayName.
*/
vtkSetStringMacro(KValuesArrayName);
vtkGetStringMacro(KValuesArrayName);
//@}
//@{
/**
* Set/get the MaxNumIterations used to terminate iterations on
* cluster center coordinates when the relative tolerance can not be met.
*/
vtkSetMacro( MaxNumIterations, int );
vtkGetMacro( MaxNumIterations, int );
//@}
//@{
/**
* Set/get the relative \a Tolerance used to terminate iterations on
* cluster center coordinates.
*/
vtkSetMacro( Tolerance, double );
vtkGetMacro( Tolerance, double );
//@}
/**
* Given a collection of models, calculate aggregate model
* NB: not implemented
*/
void Aggregate( vtkDataObjectCollection*,
vtkMultiBlockDataSet* ) VTK_OVERRIDE { return; };
/**
* A convenience method for setting properties by name.
*/
bool SetParameter(
const char* parameter, int index, vtkVariant value ) VTK_OVERRIDE;
protected:
vtkKMeansStatistics();
~vtkKMeansStatistics() VTK_OVERRIDE;
/**
* Execute the calculations required by the Learn option.
*/
void Learn( vtkTable*,
vtkTable*,
vtkMultiBlockDataSet* ) VTK_OVERRIDE;
/**
* Execute the calculations required by the Derive option.
*/
void Derive( vtkMultiBlockDataSet* ) VTK_OVERRIDE;
/**
* Execute the calculations required by the Assess option.
*/
void Assess( vtkTable*,
vtkMultiBlockDataSet*,
vtkTable* ) VTK_OVERRIDE;
/**
* Execute the calculations required by the Test option.
*/
void Test( vtkTable*,
vtkMultiBlockDataSet*,
vtkTable* ) VTK_OVERRIDE { return; };
/**
* Provide the appropriate assessment functor.
*/
void SelectAssessFunctor( vtkTable* inData,
vtkDataObject* inMeta,
vtkStringArray* rowNames,
AssessFunctor*& dfunc ) VTK_OVERRIDE;
/**
* Subroutine to update new cluster centers from the old centers.
* Called from within Learn (and will be overridden by vtkPKMeansStatistics
* to handle distributed datasets).
*/
virtual void UpdateClusterCenters( vtkTable* newClusterElements,
vtkTable* curClusterElements,
vtkIdTypeArray* numMembershipChanges,
vtkIdTypeArray* numElementsInCluster,
vtkDoubleArray* error,
vtkIdTypeArray* startRunID,
vtkIdTypeArray* endRunID,
vtkIntArray *computeRun );
/**
* Subroutine to get the total number of observations.
* Called from within Learn (and will be overridden by vtkPKMeansStatistics
* to handle distributed datasets).
*/
virtual vtkIdType GetTotalNumberOfObservations( vtkIdType numObservations );
/**
* Subroutine to initalize the cluster centers using those provided by the user
* in input port LEARN_PARAMETERS. If no cluster centers are provided, the subroutine uses the
* first DefaultNumberOfClusters input data points as initial cluster centers.
* Called from within Learn.
*/
int InitializeDataAndClusterCenters(vtkTable* inParameters,
vtkTable* inData,
vtkTable* dataElements,
vtkIdTypeArray* numberOfClusters,
vtkTable* curClusterElements,
vtkTable* newClusterElements,
vtkIdTypeArray* startRunID,
vtkIdTypeArray* endRunID);
/**
* Subroutine to initialize cluster centerss if not provided by the user.
* Called from within Learn (and will be overridden by vtkPKMeansStatistics
* to handle distributed datasets).
*/
virtual void CreateInitialClusterCenters(vtkIdType numToAllocate,
vtkIdTypeArray* numberOfClusters,
vtkTable* inData,
vtkTable* curClusterElements,
vtkTable* newClusterElements);
/**
* This is the default number of clusters used when the user does not provide initial cluster centers.
*/
int DefaultNumberOfClusters;
/**
* This is the name of the column that specifies the number of clusters in each run.
* This is only used if the user has not specified initial clusters.
*/
char* KValuesArrayName;
/**
* This is the maximum number of iterations allowed if the new cluster centers have not yet converged.
*/
int MaxNumIterations;
/**
* This is the percentage of data elements that swap cluster IDs
*/
double Tolerance;
/**
* This is the Distance functor. The default is Euclidean distance, however this can be overridden.
*/
vtkKMeansDistanceFunctor* DistanceFunctor;
private:
vtkKMeansStatistics( const vtkKMeansStatistics& ) VTK_DELETE_FUNCTION;
void operator=( const vtkKMeansStatistics& ) VTK_DELETE_FUNCTION;
};
#endif
|