/usr/include/libphylo/jcDistance.h is in rate4site 3.0.0-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | // $Id: jcDistance.h 1928 2007-04-04 16:46:12Z privmane $
#ifndef ___JC_DISTANCE
#define ___JC_DISTANCE
#include "definitions.h"
#include "distanceMethod.h"
#include <typeinfo>
#include <cmath>
/*********************************************************
Jukes-Cantor distance method.
Assumes no constraints on replacement from one state to another.
Receives size of alphabet in constructor, and this enables
to have one class for JC-distance for nucleotides, a.a., and codons
Weights are an input vector for giving additional weight to positions in the sequences.
*******************************************************/
class jcDistance : public distanceMethod {
public:
explicit jcDistance() {}
virtual jcDistance* clone() const{ return new jcDistance(*this);}
const MDOUBLE giveDistance( const sequence& s1,
const sequence& s2,
const vector<MDOUBLE> * weights,
MDOUBLE* score=NULL) const {//score is not used here
if (typeid(s1.getAlphabet()) != typeid(s2.getAlphabet()))
errorMsg::reportError("Error in jcDistance::giveDistance, s1 and s2 contain different type of alphabet");
// pS1Base and pS2Base are references to s1 and s2 respectively.
// The method uses seq1 and seq2 and not s1 and s2, because when
// the sequences contain mulAlphabet we must first convert them to the base alphabet
const sequence* pS1Base(&s1);
const sequence* pS2Base(&s2);
const alphabet* alph = s1.getAlphabet();
// if s1 and contains mulAlphabet
const mulAlphabet* mulAlph = dynamic_cast<const mulAlphabet*>(alph);
if (mulAlph!=NULL) {
pS1Base = new sequence(s1,mulAlph->getBaseAlphabet());
pS2Base = new sequence(s2,mulAlph->getBaseAlphabet());
}
int alphabetSize = pS1Base->getAlphabet()->size();
// const MDOUBLE MAXDISTANCE=2.0;
const MDOUBLE MAXDISTANCE=15;
MDOUBLE p =0;
MDOUBLE len=0.0;
if (weights == NULL) {
for (int i = 0; i < pS1Base->seqLen() ; ++i) {
if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data.
len+=1.0;
if ((*pS1Base)[i] != (*pS2Base)[i]) p++;
}
if (len==0) p=1;
else p = p/len;
} else {
for (int i = 0; i < pS1Base->seqLen() ; ++i) {
if ((*pS1Base)[i]<0 || (*pS2Base)[i]<0) continue; //gaps and missing data.
len += (*weights)[i];
if ((*pS1Base)[i] != (*pS2Base)[i]) p+=((*weights)[i]);
}
if (len==0) p=1;
else {
p = p/len;
}
}
if (pS1Base != &s1) {
delete pS1Base;
delete pS2Base;
}
const MDOUBLE inLog = 1 - (MDOUBLE)alphabetSize*p/(alphabetSize-1.0);
if (inLog<=0) {
// LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG");
// LOG(6,<<" p="<<p<<endl);
return MAXDISTANCE;
}
MDOUBLE dis = -1.0 * (1.0 - 1.0/alphabetSize) * log (inLog);
return dis;
}
};
class jcDistanceOLD : public distanceMethod {
// in this version, if you have
// a gap in front of a letter - it will be taken as a different
// and also the length of the pairwise comparison will be increased.
// in case of a gap-gap, it won't be a difference, but the length will
// be increase.
private:
const int _alphabetSize;
public:
explicit jcDistanceOLD(const int alphabetSize) : _alphabetSize(alphabetSize) {
}
explicit jcDistanceOLD(const jcDistanceOLD& other) : _alphabetSize(other._alphabetSize) {
}
virtual jcDistanceOLD* clone() const{ return new jcDistanceOLD(*this);}
const MDOUBLE giveDistance( const sequence& s1,
const sequence& s2,
const vector<MDOUBLE> * weights,
MDOUBLE* score=NULL) const {//score is not used here
// const MDOUBLE MAXDISTANCE=2.0;
const MDOUBLE MAXDISTANCE=15;
MDOUBLE p =0;
MDOUBLE len=0.0;
if (weights == NULL) {
for (int i = 0; i < s1.seqLen() ; ++i) {
//if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data.
len+=1.0;
if (s1[i] != s2[i]) p++;
}
if (len==0) p=1;
else p = p/len;
} else {
for (int i = 0; i < s1.seqLen() ; ++i) {
//if (s1[i]<0 || s2[i]<0) continue; //gaps and missing data.
len += (*weights)[i];
if (s1[i] != s2[i]) p+=((*weights)[i]);
}
if (len==0) p=1;
else {
p = p/len;
}
}
const MDOUBLE inLog = 1 - (MDOUBLE)_alphabetSize*p/(_alphabetSize-1.0);
if (inLog<=0) {
// LOG(6,<<" DISTANCES FOR JC DISTANCE ARE TOO BIG");
// LOG(6,<<" p="<<p<<endl);
return MAXDISTANCE;
}
MDOUBLE dis = -1.0 * (1.0 - 1.0/_alphabetSize) * log (inLog);
return dis;
}
};
#endif
|