This file is indexed.

/usr/include/apertium-3.4/apertium/tmx_dictionary.h is in apertium-dev 3.4.2~r68466-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*************************************************************************
*                                                                        *
*  (C) Copyright 2004. Media Research Centre at the                      *
*  Sociology and Communications Department of the                        *
*  Budapest University of Technology and Economics.                      *
*                                                                        *
*  Developed by Daniel Varga.                                            *
*                                                                        *
*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
*                                                                        *
*************************************************************************/
#ifndef __TMXALIGNER_ALIGNMENT_DICTIONARY_H
#define __TMXALIGNER_ALIGNMENT_DICTIONARY_H

#include <apertium/tmx_words.h>

#include <string>
#include <vector>
#include <map>
#include <iosfwd>


namespace TMXAligner
{

typedef std::pair<Phrase,Phrase> DictionaryItem;

class DictionaryItems : public std::vector<DictionaryItem>
{
public:
  void read( std::istream& is );
};

class HalfDictionary : public std::vector<WordList>
{
public:
  void read( std::istream& is );
};


// After reading, this dictionary cannot be altered.
// Also, this is a strictly one-directional dictionary.
// If the other direction is needed, reverse( const Dictionary& dic ) another dictionary.
class Dictionary
{
public:
  void read( const char* dictionaryFile );
  void reverse( const Dictionary& dic );
  void build( const DictionaryItems& dictionaryItems );

  bool lookupWord( const Word& word, DictionaryItems& results ) const;
  bool lookupWordSet( const WordList& words, DictionaryItems& results ) const;

private:
  void buildWordLookupTable();

private:
  DictionaryItems dictionaryItems;

  typedef std::map<Word,int> wordLookupTable;
};

class FrequencyMap : public std::map<Word,int>
{
public:
  void add( const Word& word );
  void remove( const Word& word );
  void build( const WordList& wordList );
  void remove( const WordList& wordList );
  void build( const SentenceList& sentenceList ); // Just for convenience.
  int  total() const;
  void dump( std::ostream& os, int itemNum ) const;
  void lowPassFilter( WordList& allowedWords, double ratio ) const;
  void highPassFilter( WordList& allowedWords, double ratio ) const;

private:
  typedef std::multimap<int,Word> ReFrequencyMap;
  void reverseMap( ReFrequencyMap& reFrequencyMap ) const;
};


void filterSentences( SentenceList& sentenceList, const WordList& words );

void removeHungarianStopwords( SentenceList& huSentenceList );
void removeEnglishStopwords  ( SentenceList& enSentenceList );
void removeStopwords  ( SentenceList& huSentenceList, SentenceList& enSentenceList );


typedef std::pair<Word,Word> WordPair;

class TransLex
{
public:

  typedef std::multimap<Word,Word> WordMultimap;
  typedef WordMultimap::const_iterator WordMultimapIt;
  typedef std::pair<WordMultimapIt,WordMultimapIt> DictInterval;

  void add( const Word& huWord, const Word& enWord );
  void build( const DictionaryItems& dictionaryItems );

  DictInterval lookupLeftWord ( const Word& huWord ) const;
  DictInterval lookupRightWord( const Word& enWord ) const;
  bool isPresent( const Word& huWord, const Word& enWord ) const;

private:
  WordMultimap forward;
  WordMultimap backward;
};

class IBMModelOne
{
public:
  double lookup( const Word& hu, const Word& en ) const;

  double distance( const Phrase& hu, const Phrase& en ) const;

  void build( const SentenceList& huSentenceList, const SentenceList& enSentenceList );

  void reestimate( const SentenceList& huSentenceList, const SentenceList& enSentenceList );

public:
  typedef std::pair<Word,Word> WordPair;
  typedef std::map<WordPair,double> TransProbs;

  TransProbs transProbs;
};

} // namespace TMXAligner

#endif // #define __TMXALIGNER_ALIGNMENT_DICTIONARY_H