This file is indexed.

/usr/include/ngram/ngram-count-prune.h is in libngram-dev 1.3.2-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2016 Brian Roark and Google, Inc.
// Count pruning style model shrinking derived class.

#ifndef NGRAM_NGRAM_COUNT_PRUNE_H_
#define NGRAM_NGRAM_COUNT_PRUNE_H_

#include <ngram/ngram-shrink.h>

namespace ngram {

class NGramCountPrune : public NGramShrink<StdArc> {
 public:
  // Constructs an NGramCountShrink object that count prunes an LM.
  // This version parses a count pattern string.
  // Expected format: "X(+):Y;Z(+):W"  X,Z are n-gram orders
  // '+' optional designation for >= order and Y,W are count minimums
  // ':' delimits prior to count minimum; ';' delimits fields.
  //
  // Example: "2:2;3+:3" signifies:
  //   prune bigrams with count < 2; trigrams and above with count < 3
  NGramCountPrune(StdMutableFst *infst, const string &count_pattern,
                  int shrink_opt = 0, double tot_uni = -1.0,
                  Label backoff_label = 0, double norm_eps = kNormEps,
                  bool check_consistency = false)
      : NGramShrink<StdArc>(infst, shrink_opt < 2 ? shrink_opt : 0, tot_uni,
                            backoff_label, norm_eps, check_consistency) {
    // shrink_opt must be less than 2 for count pruning
    for (int i = 0; i < HiOrder(); ++i)  // initialize minimum values
      count_minimums_.push_back(-StdArc::Weight::Zero().Value());
    if (!count_pattern.empty()) ParseCountMinimums(count_pattern);
  }

  // Constructs an NGramCountShrink object that count prunes an LM.
  // This version is given the count minimums per order.
  NGramCountPrune(StdMutableFst *infst,
                  const std::vector<double> &count_minimums, int shrink_opt = 0,
                  double tot_uni = -1.0, Label backoff_label = 0,
                  double norm_eps = kNormEps, bool check_consistency = false)
      : NGramShrink(infst, shrink_opt < 2 ? shrink_opt : 0, tot_uni,
                    backoff_label, norm_eps, check_consistency) {
    // shrink_opt must be less than 2 for count pruning
    for (int i = 0; i < HiOrder(); ++i) {  // initialize minimum values
      count_minimums_[i] = count_minimums.size() > i
                               ? count_minimums[i]
                               : StdArc::Weight::Zero().Value();
    }
  }

  ~NGramCountPrune() override {}

  // Shrinks n-gram model, based on initialized parameters
  bool ShrinkNGramModel() {
    return NGramShrink<StdArc>::ShrinkNGramModel(false);
  }

 protected:
  // Gives the pruning threshold (based on input count minimums)
  double GetTheta(StateId state) const override {
    return count_minimums_[StateOrder(state) - 1];
  }

 private:
  // Checks if character is digit or decimal
  bool IsInNumber(char c) const { return (c >= '0' && c <= '9') || c == '.'; }

  // Stores character and moves string iterator to next position
  char GetNextChar(string::const_iterator *strit) const {
    char c = (*(*strit));
    ++(*strit);
    return c;
  }

  // Reads from string while token is a numerical value
  template <class A>
  char GetNextCharVal(string::const_iterator *strit, A *toget,
                      const string &count_pattern);

  // Derives count minimums from input count pruning string.
  void ParseCountMinimums(const string &count_pattern);

  // Updates count minimums for order, based on parsed parameter string
  void UpdateCountMinimums(int order, double count, bool plus);

  std::vector<double> count_minimums_;  // minimums for count pruning
};

}  // namespace ngram

#endif  // NGRAM_NGRAM_COUNT_PRUNE_H_