This file is indexed.

/usr/include/freecontact.h is in libfreecontact-dev 1.0.21-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/*  FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment
*   Copyright (C) 2012-2013  Laszlo Kajan <lkajan@rostlab.org> Rost Lab, Technical University of Munich, Germany
*   
*   This program is free software: you can redistribute it and/or modify
*   it under the terms of the GNU General Public License as published by
*   the Free Software Foundation, either version 3 of the License, or
*   (at your option) any later version.
*   
*   This program is distributed in the hope that it will be useful,
*   but WITHOUT ANY WARRANTY; without even the implied warranty of
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*   GNU General Public License for more details.
*   
*   You should have received a copy of the GNU General Public License
*   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LIBFREECONTACT_H
#define LIBFREECONTACT_H

#include <errno.h>
#include <map>
#include <math.h>
#include <stdexcept>
#include <stdint.h>
#include <string.h>
#include <vector>
#ifdef __SSE2__
#include <x86intrin.h>
#endif // __SSE2__

#define LIBFREEC_API __attribute__ ((visibility ("default")))
#define LIBFREEC_LOCAL __attribute__ ((visibility ("hidden")))

namespace freecontact {

// http://www.fao.org/docrep/004/Y2775E/y2775e0e.htm
// Use 20 for gap.
static const uint8_t aamap_gapidx = 20;
static const uint8_t aamap[] = {
    //  A  B  C  D  E  F   G  H  I  J   K   L   M   N
    21, 0, 3, 4, 3, 6, 13, 7, 8, 9, 21, 11, 10, 12, 2,
    //  O   P   Q  R  S   T   U   V   W   X   Y   Z
        21, 14, 5, 1, 15, 16, 21, 19, 17, 21, 18, 6
};
static const uint8_t mapaa[] = {
   //0    1    2    3    4    5    6    7    8    9
    'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
    'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V',
    '-', 'X'
};

/// Alignment length exception.
class LIBFREEC_API alilen_error : public std::runtime_error {
    public:
                    alilen_error(const std::string& __arg) : std::runtime_error(__arg){}
};

/// Inverse covariance matrix estimation timeout exception.
class LIBFREEC_API icme_timeout_error : public std::runtime_error {
    public:
                    icme_timeout_error(const std::string& __arg) : std::runtime_error(__arg){}
};

#ifndef __SSE2__
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#endif

/// Protein sequence alignment.
class LIBFREEC_API ali_t : public std::vector<__m128i>
{
    private:
    #pragma GCC visibility push(hidden)
    #pragma GCC visibility pop
    public:
    uint32_t seqcnt;
    uint16_t alilen; // aligned sequence length, max 65535, reasonable
    uint16_t alilen16; // size in 16-bytes
    uint16_t alilenpad; // 16 * alilen16

                    ali_t( const uint16_t __alilen = 0 ) : std::vector<__m128i>(), seqcnt(0), alilen(__alilen), alilen16(( alilen-1 )/16 + 1), alilenpad( 16 * alilen16 )
    {
        reserve(1024*alilen16);
    }
    virtual         ~ali_t(){}

    inline uint8_t& operator()(uint32_t __k, uint16_t __ai)
    {
        return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
    }
    inline const uint8_t& operator()(uint32_t __k, uint16_t __ai) const
    {
        return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
    }

    /// Convert alignment string to amino acid codes with aamap.
    static std::vector<uint8_t>
                    read_a_seq( const std::string& __l )
    {
        std::vector<uint8_t> ret; ret.reserve(__l.length());
        for( std::string::const_iterator l_b = __l.begin(), l_e = __l.end(); l_b != l_e; ++l_b )
        {
            const std::string::value_type c = *l_b;
            if( isalpha(c) )
                ret.push_back( aamap[c & 0x1F] );
            else if( c == '-' )
                ret.push_back( 20 );
            else break;
        }
        return ret;
    }

    /// Push a sequence to the alignment.
    ali_t&      push(const std::vector<uint8_t>& __al) throw (alilen_error);

    /// Push a sequence to the alignment.
    inline ali_t&   push(const std::string& __l) throw (alilen_error)
    {
        return push(read_a_seq(__l));
    }
};


/// Contact prediction.
struct LIBFREEC_API contact_t {
    /// Residue number, 0-based(!).
    uint16_t        i, j;
    /// Contact score.
    float           score;
                    contact_t( uint16_t __i = 0, uint16_t __j = 0, float __score = 0 ) : i(__i), j(__j), score(__score) {}

    inline bool     operator<( const contact_t& __b ) const { return score < __b.score; } // to facilitate sorting by score
    inline bool     operator>( const contact_t& __b ) const { return score > __b.score; } // to facilitate sorting by score
};

/// Parameter set structure for predictor::run().
struct parset_t {
    double clustpc, density, gapth; uint16_t mincontsep;
    double pseudocnt, pscnt_weight; bool estimate_ivcov; double shrink_lambda;
    bool cov20, apply_gapth; double rho;

                    parset_t( double __clustpc = 0, double __density = 0, double __gapth = 0, uint16_t __mincontsep = 0,
                        double __pseudocnt = 0, double __pscnt_weight = 0,  bool __estimate_ivcov = 0, double __shrink_lambda = 0,
                        bool __cov20 = 0, bool __apply_gapth = 0, double __rho = 0
                    ) :
                    clustpc(__clustpc), density(__density), gapth(__gapth), mincontsep(__mincontsep),
                    pseudocnt(__pseudocnt), pscnt_weight(__pscnt_weight), estimate_ivcov(__estimate_ivcov), shrink_lambda(__shrink_lambda),
                    cov20(__cov20), apply_gapth(__apply_gapth), rho(__rho)
                    {}
};

const parset_t //          clust conts gapt minc pseud pscw estim  shrin cov20  agapth  rho
               //ps_evfold(0.70, 0.0,  1.0, 1,   0.0,  0.5, false, 0.0,  true,  false,  -1 ), // original Matlab that expects gap pre-filtering
               ps_evfold(  0.70, 0.0,  0.9, 1,   0.0,  0.5, false, 0.0,  true,  true,   -1 ),
               ps_psicov(  0.62, 0.03, 0.9, 5,   1.0,  0.0, true,  0.1,  false, false,  -1 ), // as published
            ps_psicov_sd(  0.62, 0.0,  0.9, 5,   1.0,  0.0, true,  0.1,  false, false,   0.001 ); // PSICOV sensible default (sd) for most cases

/// Protein residue contact predictor.
/** A sufficiently large multiple alignment is required for meaningful results.
 */
class LIBFREEC_API predictor {
    public:
    typedef std::map<std::string, std::vector<contact_t> >      cont_res_t;
    typedef double                                              freq_t;
    typedef float                                               pairfreq_t;
    typedef std::vector<freq_t>                                 freq_vec_t;
    typedef std::map<std::string, double>                       time_res_t;
    public:
    bool            dbg;
                    predictor(bool __dbg = false) : dbg(__dbg) {}
    virtual         ~predictor(){}

    /// Calculate alignment sequence weights.
    /** \param [out] __aliw         Vector of alignment sequence weights.
     *  \param [out] __wtot         Total alignment weight.
     *  \param [in] __ali           Input alignment.
     *  \param [in] __clustpc       BLOSUM-style clustering similarity threshold [0-1].
     *  \param [in] __veczw         Use vectorized sequence weighting when available.
     *  \param [in] __num_threads   Number of OpenMP threads, effective if non-zero. The default is as many threads as cores in host.
            Ineffective if library is not compiled with OMP support.
     */
     void           get_seq_weights(
                        freq_vec_t &__aliw, double &__wtot,
                        const ali_t& __ali, double __clustpc,
                        bool __veczw = true, int __num_threads = 0
                    ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);

    /// Predict residue contacts.
    /** \param [in] __ali           Input alignment.
     *  \param [in] __aliw          Vector of alignment sequence weights.
     *      Obtain this with get_seq_weights().
     *  \param [in] __wtot          Total weight of alignment.
     *      Obtain this with get_seq_weights().
     *  \param [in] __density       Target precision matrix density [0-1]. Set to 0 to not control density.
     *  \param [in] __gapth         Threshold of weighted gap frequency for ignoring alignment columns with too many gaps [0-1]. Set to 1.00 to keep all columns.
     *      This is implemented by using a very high regularization value for gap columns, and by excluding gap columns from the covariance
     *      matrix, see __apply_gapth.
     *  \param [in] __mincontsep    Minimum sequence-wise contacting residue pair separation given in amino acids as (j-i). 1 for adjacent residues. [1-).
     *  \param [in] __pseudocnt     Number to initialize single and pair amino acid counts with [0-).
     *  \param [in] __pscnt_weight  Pseudocount weight to apply to single and pair amino acid frequencies [0-1].
     *  \param [in] __estimate_ivcov Estimate inverse covariance matrix instead of fully inverting matrix. This is currently done by GLASSOFAST.
     *  \param [in] __shrink_lambda Covariance matrix shrinkage parameter, controlling rate of shrinkage [0-1].
     *  \param [in] __cov20         Leave one amino acid off the covariance matrix, making it non-overdetermined.
     *  \param [in] __apply_gapth   When true, exclude residue columns and rows with a weighted gap frequency > __gapth from the covariance matrix.
     *  \param [in] __rho           Initial value of Glasso regularization parameter. Negative values trigger an automatic choice for rho.
     *  \param [in] __num_threads   Number of OpenMP threads, effective if non-zero. The default is as many threads as cores in host.
            Ineffective if library is not compiled with OMP support.
     *  \param [in] __icme_timeout  Inverse covariance matrix estimation timeout in seconds. Default: 1800.
            Applied to each inversion call independently.
     *  \param [out] __timing       Pointer to map of timing results for certain components of this method. Useful for debugging. Keys are: [num_threads|seqw|pairfreq|shrink|inv|all].
     *  \return                     Map of vectors of predicted contacts.
     *      Key is contact score calculation method name: 'l1norm', 'MI' (mutual information), 'fro' (Frobenius norm after zero-sum gauge).
     *      Vectors may not contain all contact pairs. Contacts are ordered by residue index (i,j).
     */
    cont_res_t      run(const ali_t& __ali, const freq_vec_t &__aliw, const double __wtot,
                        double __density, double __gapth, uint16_t __mincontsep,
                        double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
                        bool __cov20, bool __apply_gapth, double __rho,
                        int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
                    ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);

    /// Predict residue contacts.
    cont_res_t      run(const ali_t& __ali, double __clustpc,
                        double __density, double __gapth, uint16_t __mincontsep,
                        double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
                        bool __cov20, bool __apply_gapth, double __rho,
                        bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
                    ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);

    /// Predict residue contacts.
    inline
    cont_res_t      run(const ali_t& __ali,
                        const parset_t& __parset,
                        bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
                    ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
                    {
                        return run(__ali, __parset.clustpc,
                            __parset.density, __parset.gapth, __parset.mincontsep,
                            __parset.pseudocnt, __parset.pscnt_weight, __parset.estimate_ivcov, __parset.shrink_lambda,
                            __parset.cov20, __parset.apply_gapth, __parset.rho,
                            __veczw, __num_threads, __icme_timeout, __timing);
                    }
};

} // namespace freecontact

#endif // LIBFREECONTACT_H
// vim:et:ts=4:ai: