/usr/include/shark/ObjectiveFunctions/NegativeLogLikelihood.h is in libshark-dev 3.1.4+ds1-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | /*!
*
*
* \brief Negative Log Likelihood error function
*
*
*
* \author O.Krause
* \date 2014
*
*
* \par Copyright 1995-2015 Shark Development Team
*
* <BR><HR>
* This file is part of Shark.
* <http://image.diku.dk/shark/>
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H
#define SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H
#include <shark/Models/AbstractModel.h>
#include <shark/ObjectiveFunctions/AbstractObjectiveFunction.h>
#include <shark/Rng/GlobalRng.h>
#include <boost/range/algorithm_ext/iota.hpp>
#include <boost/range/algorithm/random_shuffle.hpp>
namespace shark{
/// \brief Computes the negative log likelihood of a dataset under a model
///
/// The negative log likelihood is defined as
/// \f[ L(\theta) = -\frac 1 N \sum_{i=1}^N log(p_{\theta}(x_i)) \f]
/// where \f$ \theta \f$ is the vector of parameters of the model \f$ p \f$ and \f$ x \f$ are the
/// datapoints of the training set. Minimizing this
/// maximizes the probability of the datast under p. This error measure is
/// closely related to the Kulback-Leibler-Divergence.
///
/// For this error function, the model is only allowed to have a single output
/// - the probability of the sample. The distribution must be normalized as otherwise
/// the likeelihood does not mean anything.
class NegativeLogLikelihood : public SingleObjectiveFunction
{
public:
typedef UnlabeledData<RealVector> DatasetType;
NegativeLogLikelihood(
DatasetType const& data,
AbstractModel<RealVector,RealVector>* model
):mep_model(model),m_data(data){
if(mep_model->hasFirstParameterDerivative())
m_features |= HAS_FIRST_DERIVATIVE;
m_features |= CAN_PROPOSE_STARTING_POINT;
}
/// \brief From INameable: return the class name.
std::string name() const
{ return "NegativeLogLikelihood"; }
SearchPointType proposeStartingPoint() const{
return mep_model->parameterVector();
}
std::size_t numberOfVariables()const{
return mep_model->numberOfParameters();
}
ResultType eval(RealVector const& input) const{
SIZE_CHECK(input.size() == numberOfVariables());
m_evaluationCounter++;
mep_model->setParameterVector(input);
double error = 0;
double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities
SHARK_PARALLEL_FOR(int i = 0; i < (int)m_data.numberOfBatches(); ++i){
RealMatrix predictions = (*mep_model)(m_data.batch(i));
SIZE_CHECK(predictions.size2() == 1);
double logLikelihoodOfSamples = sum(log(max(predictions,minProb)));
SHARK_CRITICAL_REGION{
error += logLikelihoodOfSamples;
}
}
error/=m_data.numberOfElements();//compute mean
return -error;//negative log likelihood
}
ResultType evalDerivative(
SearchPointType const& input,
FirstOrderDerivative & derivative
) const{
SIZE_CHECK(input.size() == numberOfVariables());
m_evaluationCounter++;
mep_model->setParameterVector(input);
derivative.resize(input.size());
derivative.clear();
//compute partitioning on threads
std::size_t numBatches = m_data.numberOfBatches();
std::size_t numElements = m_data.numberOfElements();
std::size_t numThreads = std::min(SHARK_NUM_THREADS,numBatches);
//calculate optimal partitioning
std::size_t batchesPerThread = numBatches/numThreads;
std::size_t leftOver = numBatches - batchesPerThread*numThreads;
double error = 0;
double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities
SHARK_PARALLEL_FOR(int ti = 0; ti < (int)numThreads; ++ti){//MSVC does not support unsigned integrals in paralll loops
std::size_t t = ti;
//~ //get start and end index of batch-range
std::size_t start = t*batchesPerThread+std::min(t,leftOver);
std::size_t end = (t+1)*batchesPerThread+std::min(t+1,leftOver);
//calculate error and derivative of the current thread
FirstOrderDerivative threadDerivative(input.size(),0.0);
double threadError = 0;
boost::shared_ptr<State> state = mep_model->createState();
RealVector batchDerivative;
RealMatrix predictions;
for(std::size_t i = start; i != end; ++i){
mep_model->eval(m_data.batch(i),predictions,*state);
SIZE_CHECK(predictions.size2() == 1);
threadError += sum(log(max(predictions,minProb)));
//noalias(predictions) = elem_inv(predictions)
//the below handls numeric instabilities...
for(std::size_t j = 0; j != predictions.size1(); ++j){
for(std::size_t k = 0; k != predictions.size2(); ++k){
if(predictions(j,k) < minProb){
predictions(j,k) = 0;
}
else{
predictions(j,k) = 1.0/predictions(j,k);
}
}
}
mep_model->weightedParameterDerivative(
m_data.batch(i),predictions,*state,batchDerivative
);
threadDerivative += batchDerivative;
}
//sum over all threads
SHARK_CRITICAL_REGION{
error += threadError;
noalias(derivative) += threadDerivative;
}
}
error /= numElements;
derivative /= numElements;
derivative *= -1;
return -error;//negative log likelihood
}
private:
AbstractModel<RealVector,RealVector>* mep_model;
UnlabeledData<RealVector> m_data;
};
}
#endif
|