/usr/include/libphylo/likeDist.h is in rate4site 3.0.0-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | // $Id: likeDist.h 9752 2011-08-05 20:27:25Z rubi $
#ifndef ___LIKE_DIST_H
#define ___LIKE_DIST_H
#include "definitions.h"
#include "countTableComponent.h"
#include "distanceMethod.h"
#include "stochasticProcess.h"
#include "logFile.h"
#include "jcDistance.h"
#include "unObservableData.h"
#include <cmath>
using namespace std;
class likeDist : public distanceMethod {
public:
// WARNING: the stochasticProcess is NOT copied. The same object is used
explicit likeDist(const stochasticProcess& sp,
const MDOUBLE toll =0.0001,
const MDOUBLE maxPairwiseDistance = 5.0,
const MDOUBLE minPairwiseDistance = 0.0000001,
unObservableData* unObservableData_p=NULL)
: _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {}
likeDist(const likeDist& other)
: _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {}
virtual likeDist* clone() const {return new likeDist(*this);}
// This constructor allows non-const stochasticProcess so that likeDist will be able to change alpha, etc.
explicit likeDist(stochasticProcess& sp,
const MDOUBLE toll =0.0001,
const MDOUBLE maxPairwiseDistance = 5.0,
const MDOUBLE minPairwiseDistance = 0.0000001)
: _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {}
// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
const MDOUBLE giveDistance(const countTableComponentGam& ctc,
MDOUBLE& resQ,
const MDOUBLE initialGuess= 0.03) const; // initial guess
// given two sequences, it evaluates the log likelihood.
MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1,
const sequence& s2,
const MDOUBLE dis2evaluate);
// returns the estimated ML distance between the 2 sequences.
// if score is given, it will be the log-likelihood.
const MDOUBLE giveDistance(const sequence& s1,
const sequence& s2,
const vector<MDOUBLE> * weights,
MDOUBLE* score=NULL) const;
// this function creates a countTableComponent (ctc) from the two sequences.
// it then computes the distance from this ctc.
// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge
// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
MDOUBLE giveDistanceThroughCTC(const sequence& s1,
const sequence& s2,
const vector<MDOUBLE> * weights,
MDOUBLE* score=NULL) const;
const MDOUBLE giveLikelihood(const sequence& s1,
const sequence& s2,
MDOUBLE distance,
const vector<MDOUBLE> * weights=NULL) const;
// return the stochasticProcess
const stochasticProcess& getStochasticProcess() const {return _sp;}
stochasticProcess& getNonConstStochasticProcess();
bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;}
MDOUBLE getToll() const {return _toll;}
MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;}
protected:
const stochasticProcess &_sp;
stochasticProcess *_nonConstSpPtr;
const MDOUBLE _toll;
const MDOUBLE _maxPairwiseDistance;
const MDOUBLE _minPairwiseDistance;
jcDistance _jcDist;
unObservableData* _unObservableData_p;
private:
const MDOUBLE giveDistanceBrent( const countTableComponentGam& ctc,
MDOUBLE& resL,
const MDOUBLE initialGuess= 0.03) const; // initial guess
const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc,
MDOUBLE& resL,
const MDOUBLE initialGuess= 0.03) const; // initial guess
public:
static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp,
const sequence& s1,
const sequence& s2,
const MDOUBLE dist,
const vector<MDOUBLE> * weights=NULL);
};
//////////////////////////////////////////////////////////////////////////
class C_evalLikeDist{
private:
const countTableComponentGam& _ctc;
const stochasticProcess& _sp;
unObservableData* _unObservableData_p;
public:
C_evalLikeDist(const countTableComponentGam& ctc,
const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL)
:_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
MDOUBLE operator() (MDOUBLE dist) {
const MDOUBLE epsilonPIJ = 1e-10;
MDOUBLE sumL=0.0;
for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
MDOUBLE rate = _sp.rates(rateCategor);
MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
sumL+= _ctc.getCounts(alph1,alph2,rateCategor)*(log(pij)-log(_sp.freq(alph2)));//*_sp.ratesProb(rateCategor);// removed.
}
}
}
//if(_unObservableData_p)
// sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist
LOG(8,<<"check bl="<<dist<<" gives sumL "<<sumL<<endl);
return -sumL;
};
};
// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
class C_evalLikeDist_d{ // derivative.
public:
C_evalLikeDist_d(const countTableComponentGam& ctc,
const stochasticProcess& inS1,unObservableData* unObservableData_p=NULL): _ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
private:
const countTableComponentGam& _ctc;
const stochasticProcess& _sp;
unObservableData* _unObservableData_p;
public:
MDOUBLE operator() (MDOUBLE dist) {
const MDOUBLE epsilonPIJ = 1e-10;
MDOUBLE sumDL=0.0;
for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
MDOUBLE rate = _sp.rates(rateCategor);
MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
sumDL+= _ctc.getCounts(alph1,alph2,rateCategor)*dpij //*_sp.ratesProb(rateCategor) : removed CODE_RED
*rate/pij;
}
}
}
//cerr<<"derivation = "<<-sumDL<<endl;
//if(_unObservableData_p)
// sumDL = sumDL/(1- exp(_unObservableData_p->getlogLforMissingData())); // 1. need to find an efficient way to update LofMissingData with dist 2. correct the derivative?
LOG(12,<<"check bl="<<dist<<" gives sumDL "<<sumDL<<endl);
return -sumDL;
};
};
//////////////////////////////////////////////////////////////////////////
class C_evalLikeDist_d2{ // second derivative.
public:
C_evalLikeDist_d2(const countTableComponentGam& ctc,
const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
private:
const countTableComponentGam& _ctc;
const stochasticProcess& _sp;
public:
MDOUBLE operator() (MDOUBLE dist) {
MDOUBLE sumDL=0.0;
for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
MDOUBLE rate = _sp.rates(rateCategor);
MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
MDOUBLE d2pij = _sp.d2Pij_dt2(alph1,alph2,dist*rate);
sumDL+= rate*_ctc.getCounts(alph1,alph2,rateCategor)*
(pij*d2pij - dpij *dpij )/(pij*pij);
}
}
}
return -sumDL;
};
};
#endif
|