/usr/include/apertium-3.4/apertium/tmx_dictionary.h is in apertium-dev 3.4.2~r68466-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | /*************************************************************************
* *
* (C) Copyright 2004. Media Research Centre at the *
* Sociology and Communications Department of the *
* Budapest University of Technology and Economics. *
* *
* Developed by Daniel Varga. *
* *
* From hunalign; for license see ../AUTHORS and ../COPYING.hunalign *
* *
*************************************************************************/
#ifndef __TMXALIGNER_ALIGNMENT_DICTIONARY_H
#define __TMXALIGNER_ALIGNMENT_DICTIONARY_H
#include <apertium/tmx_words.h>
#include <string>
#include <vector>
#include <map>
#include <iosfwd>
namespace TMXAligner
{
typedef std::pair<Phrase,Phrase> DictionaryItem;
class DictionaryItems : public std::vector<DictionaryItem>
{
public:
void read( std::istream& is );
};
class HalfDictionary : public std::vector<WordList>
{
public:
void read( std::istream& is );
};
// After reading, this dictionary cannot be altered.
// Also, this is a strictly one-directional dictionary.
// If the other direction is needed, reverse( const Dictionary& dic ) another dictionary.
class Dictionary
{
public:
void read( const char* dictionaryFile );
void reverse( const Dictionary& dic );
void build( const DictionaryItems& dictionaryItems );
bool lookupWord( const Word& word, DictionaryItems& results ) const;
bool lookupWordSet( const WordList& words, DictionaryItems& results ) const;
private:
void buildWordLookupTable();
private:
DictionaryItems dictionaryItems;
typedef std::map<Word,int> wordLookupTable;
};
class FrequencyMap : public std::map<Word,int>
{
public:
void add( const Word& word );
void remove( const Word& word );
void build( const WordList& wordList );
void remove( const WordList& wordList );
void build( const SentenceList& sentenceList ); // Just for convenience.
int total() const;
void dump( std::ostream& os, int itemNum ) const;
void lowPassFilter( WordList& allowedWords, double ratio ) const;
void highPassFilter( WordList& allowedWords, double ratio ) const;
private:
typedef std::multimap<int,Word> ReFrequencyMap;
void reverseMap( ReFrequencyMap& reFrequencyMap ) const;
};
void filterSentences( SentenceList& sentenceList, const WordList& words );
void removeHungarianStopwords( SentenceList& huSentenceList );
void removeEnglishStopwords ( SentenceList& enSentenceList );
void removeStopwords ( SentenceList& huSentenceList, SentenceList& enSentenceList );
typedef std::pair<Word,Word> WordPair;
class TransLex
{
public:
typedef std::multimap<Word,Word> WordMultimap;
typedef WordMultimap::const_iterator WordMultimapIt;
typedef std::pair<WordMultimapIt,WordMultimapIt> DictInterval;
void add( const Word& huWord, const Word& enWord );
void build( const DictionaryItems& dictionaryItems );
DictInterval lookupLeftWord ( const Word& huWord ) const;
DictInterval lookupRightWord( const Word& enWord ) const;
bool isPresent( const Word& huWord, const Word& enWord ) const;
private:
WordMultimap forward;
WordMultimap backward;
};
class IBMModelOne
{
public:
double lookup( const Word& hu, const Word& en ) const;
double distance( const Phrase& hu, const Phrase& en ) const;
void build( const SentenceList& huSentenceList, const SentenceList& enSentenceList );
void reestimate( const SentenceList& huSentenceList, const SentenceList& enSentenceList );
public:
typedef std::pair<Word,Word> WordPair;
typedef std::map<WordPair,double> TransProbs;
TransProbs transProbs;
};
} // namespace TMXAligner
#endif // #define __TMXALIGNER_ALIGNMENT_DICTIONARY_H
|