This file is indexed.

/usr/include/kmer/kmer/existDB.H is in libkmer-dev 0~20150903+r2013-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#ifndef EXISTDB_H
#define EXISTDB_H

//  Used by wgs-assembler, to determine if a rather serious bug was patched.
#define EXISTDB_H_VERSION 1960

#include "bio++.H"

//  Takes as input a list of mers (in a file) and builds a searchable
//  structure listing those mers.  Duplicate mers are not removed and
//  will be stored multiple times.
//
//  Using a compressed hash is allowed, but somewhat useless -- it is
//  really slow and doesn't save that much.
//
//  If existDBcanonical is requested, this will store only the
//  canonical mer.  It is up to the client to be sure that is
//  appropriate!  See positionDB.H for more.

//#define STATS

typedef uint32 existDBflags;
const existDBflags  existDBnoFlags         = 0x0000;
const existDBflags  existDBcompressHash    = 0x0001;
const existDBflags  existDBcompressBuckets = 0x0002;
const existDBflags  existDBcompressCounts  = 0x0004;
const existDBflags  existDBcanonical       = 0x0008;
const existDBflags  existDBforward         = 0x0010;
const existDBflags  existDBcounts          = 0x0020;

class existDB {
public:

  //  Read state from an existDB file
  existDB(char const  *filename,
          bool         loadData=true);

  //  Load mers from an existing existDB file, a fastafile, or a meryl database
  existDB(char const    *filename,
          uint32         merSize,
          existDBflags   flags,
          uint32         lo,
          uint32         hi);

  //  Load mers from a character string
  existDB(char const    *sequence,
          uint32         merSize,
          existDBflags   flags);

  ~existDB();

  void        saveState(char const *filename);

  void        printState(FILE *stream);

  bool        isForward(void)    { return(_isForward);   };
  bool        isCanonical(void)  { return(_isCanonical); };

  bool        exists(uint64 mer);
  uint64      count(uint64 mer);

private:
  bool        loadState(char const *filename, bool beNoisy=false, bool loadData=true);
  bool        createFromFastA(char const  *filename,
                              uint32       merSize,
                              uint32       flags);
  bool        createFromMeryl(char const  *filename,
                              uint32       merSize,
                              uint32       lo,
                              uint32       hi,
                              uint32       flags);
  bool        createFromSequence(char const  *sequence,
                                 uint32       merSize,
                                 uint32       flags);

  uint64       HASH(uint64 k) {
    return(((k >> _shift1) ^ (k >> _shift2) ^ k) & _mask1);
  };

  uint64       CHECK(uint64 k) {
    return(k & _mask2);
  };

  void         insertMer(uint64 hsh, uint64 chk, uint64 cnt, uint64 *countingTable) {

    //  If the mer is already here, just update the count.  This only
    //  works if not _compressedBucket, and only makes sense for loading from
    //  fasta or sequence.

    if ((_compressedBucket == false) &&
        (_searchForDupe)) {
      uint64 st = _hashTable[hsh];
      uint64 ed = countingTable[hsh];

      for (; st<ed; st++) {
        if (_buckets[st] == chk) {
          if (_counts)
            _counts[st] += cnt;
          return;
        }
      }
    }

    if (_compressedBucket)
      setDecodedValue(_buckets, countingTable[hsh] * _chkWidth, _chkWidth, chk);
    else
      _buckets[countingTable[hsh]] = chk;

    if (_counts) {
      if (_compressedCounts) {
        setDecodedValue(_counts, countingTable[hsh] * _cntWidth, _cntWidth, cnt);
      } else {
        _counts[countingTable[hsh]] = cnt;
      }
    }

    countingTable[hsh]++;
  };

  bool        _compressedHash;
  bool        _compressedBucket;
  bool        _compressedCounts;
  bool        _isForward;
  bool        _isCanonical;

  bool        _searchForDupe;

  uint32      _merSizeInBases;

  uint32      _shift1;
  uint32      _shift2;
  uint64      _mask1;
  uint64      _mask2;

  uint32      _hshWidth;  //  Only for the compressed hash
  uint32      _chkWidth;  //  Only for the compressed bucket
  uint32      _cntWidth;  //  Only for the compressed counts

  uint64      _hashTableWords;
  uint64      _bucketsWords;
  uint64      _countsWords;

  uint64     *_hashTable;
  uint64     *_buckets;
  uint64     *_counts;

  void clear(void) {
  };
};

#endif  //  EXISTDB_H