This file is indexed.

/usr/include/OTB-5.8/otbTrainRandomForests.txx is in libotb-dev 5.8.0+dfsg-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*=========================================================================
 Program:   ORFEO Toolbox
 Language:  C++
 Date:      $Date$
 Version:   $Revision$


 Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
 See OTBCopyright.txt for details.


 This software is distributed WITHOUT ANY WARRANTY; without even
 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 PURPOSE.  See the above copyright notices for more information.

 =========================================================================*/
#ifndef otbTrainRandomForests_txx
#define otbTrainRandomForests_txx
#include "otbLearningApplicationBase.h"

namespace otb
{
namespace Wrapper
{

template <class TInputValue, class TOutputValue>
void
LearningApplicationBase<TInputValue,TOutputValue>
::InitRandomForestsParams()
{
  AddChoice("classifier.rf", "Random forests classifier");
  SetParameterDescription("classifier.rf",
                          "This group of parameters allows setting Random Forests classifier parameters. "
                          "See complete documentation here \\url{http://docs.opencv.org/modules/ml/doc/random_trees.html}.");
  //MaxDepth
  AddParameter(ParameterType_Int, "classifier.rf.max", "Maximum depth of the tree");
  SetParameterInt("classifier.rf.max", 5);
  SetParameterDescription(
      "classifier.rf.max",
      "The depth of the tree. A low value will likely underfit and conversely a high value will likely overfit. "
      "The optimal value can be obtained using cross validation or other suitable methods.");

  //MinSampleCount
  AddParameter(ParameterType_Int, "classifier.rf.min", "Minimum number of samples in each node");
  SetParameterInt("classifier.rf.min", 10);
  SetParameterDescription(
      "classifier.rf.min", "If the number of samples in a node is smaller than this parameter, "
      "then the node will not be split. A reasonable value is a small percentage of the total data e.g. 1 percent.");

  //RegressionAccuracy
  AddParameter(ParameterType_Float, "classifier.rf.ra", "Termination Criteria for regression tree");
  SetParameterFloat("classifier.rf.ra", 0.);
  SetParameterDescription("classifier.rf.ra", "If all absolute differences between an estimated value in a node "
                          "and the values of the train samples in this node are smaller than this regression accuracy parameter, "
                          "then the node will not be split.");

  //UseSurrogates : don't need to be exposed !
  //AddParameter(ParameterType_Empty, "classifier.rf.sur", "Surrogate splits will be built");
  //SetParameterDescription("classifier.rf.sur","These splits allow working with missing data and compute variable importance correctly.");

  //MaxNumberOfCategories
  AddParameter(ParameterType_Int, "classifier.rf.cat",
               "Cluster possible values of a categorical variable into K <= cat clusters to find a suboptimal split");
  SetParameterInt("classifier.rf.cat", 10);
  SetParameterDescription(
      "classifier.rf.cat",
      "Cluster possible values of a categorical variable into K <= cat clusters to find a suboptimal split.");

  //Priors are not exposed.

  //CalculateVariableImportance not exposed

  //MaxNumberOfVariables
  AddParameter(ParameterType_Int, "classifier.rf.var",
               "Size of the randomly selected subset of features at each tree node");
  SetParameterInt("classifier.rf.var", 0);
  SetParameterDescription(
      "classifier.rf.var",
      "The size of the subset of features, randomly selected at each tree node, that are used to find the best split(s). "
      "If you set it to 0, then the size will be set to the square root of the total number of features.");

  //MaxNumberOfTrees
  AddParameter(ParameterType_Int, "classifier.rf.nbtrees",
               "Maximum number of trees in the forest");
  SetParameterInt("classifier.rf.nbtrees", 100);
  SetParameterDescription(
      "classifier.rf.nbtrees",
      "The maximum number of trees in the forest. Typically, the more trees you have, the better the accuracy. "
      "However, the improvement in accuracy generally diminishes and reaches an asymptote for a certain number of trees. "
      "Also to keep in mind, increasing the number of trees increases the prediction time linearly.");

  //ForestAccuracy
  AddParameter(ParameterType_Float, "classifier.rf.acc",
               "Sufficient accuracy (OOB error)");
  SetParameterFloat("classifier.rf.acc", 0.01);
  SetParameterDescription("classifier.rf.acc","Sufficient accuracy (OOB error).");


  //TerminationCriteria not exposed
}

template <class TInputValue, class TOutputValue>
void
LearningApplicationBase<TInputValue,TOutputValue>
::TrainRandomForests(typename ListSampleType::Pointer trainingListSample,
                     typename TargetListSampleType::Pointer trainingLabeledListSample,
                     std::string modelPath)
{
  typename RandomForestType::Pointer classifier = RandomForestType::New();
  classifier->SetRegressionMode(this->m_RegressionFlag);
  classifier->SetInputListSample(trainingListSample);
  classifier->SetTargetListSample(trainingLabeledListSample);
  classifier->SetMaxDepth(GetParameterInt("classifier.rf.max"));
  classifier->SetMinSampleCount(GetParameterInt("classifier.rf.min"));
  classifier->SetRegressionAccuracy(GetParameterFloat("classifier.rf.ra"));
  classifier->SetMaxNumberOfCategories(GetParameterInt("classifier.rf.cat"));
  classifier->SetMaxNumberOfVariables(GetParameterInt("classifier.rf.var"));
  classifier->SetMaxNumberOfTrees(GetParameterInt("classifier.rf.nbtrees"));
  classifier->SetForestAccuracy(GetParameterFloat("classifier.rf.acc"));

  classifier->Train();
  classifier->Save(modelPath);
}

} //end namespace wrapper
} //end namespace otb

#endif