/usr/include/apertium-3.1/apertium/hmm.h is in libapertium3-3.1-0-dev 3.1.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | /*
* Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
/**
* First order hidden Markov model (HMM) implementation (header)
*
* @author Felipe Sánchez-Martínez - fsanchez@dlsi.ua.es
*/
#ifndef __HMM_H
#define __HMM_H
#include <cstdio>
#include <fstream>
#include <math.h>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <cfloat>
#include <cstring>
#include <apertium/collection.h>
#include <apertium/constant_manager.h>
#include <apertium/morpho_stream.h>
#include <apertium/tagger_data.h>
#include <apertium/tagger_utils.h>
#include <apertium/tagger_word.h>
using namespace std;
#define ZERO 1e-10
/** HMM
* first-order hidden Markov Model
*/
class HMM {
private:
TaggerData *td;
TTag eos; // end-of-sentence tag
bool debug; //If true, print error messages when tagging input text
bool show_sf; //If true, print superficial forms when tagging input text
/** It allocs memory for the transition (a) and the emission (b) matrices.
* Before calling this method the number of ambiguity classes must be known.
* This methos is called within read_ambiguity_classes and read_dictionary.
* @see: read_ambiguity_classes, read_dictionary
*/
void init();
/** This method returns a known ambiguity class that is a subset of
* the one received as a parameter. This is useful when a new
* ambiguity class is found because of changes in the morphological
* dictionary used by the MT system.
* @param c set of tags (ambiguity class)
* @return a known ambiguity class
*/
set<TTag> find_similar_ambiguity_class(set<TTag> c);
public:
/** Constructor
*/
HMM(TaggerData *t);
/** Destructor
*/
~HMM();
/** Used to set the end-of-sentence tag
* @param t the end-of-sentence tag
*/
void set_eos(TTag t);
/** Used to set the debug flag
*
*/
void set_debug(bool d);
/** Used to set the show superficial forms flag
*
*/
void set_show_sf(bool sf);
/** It reads the ambiguity classes from the stream received as
* input
* @param is the input stream
*/
void read_ambiguity_classes(FILE *in);
/** It writes the ambiguity classes to the stream received as
* a parameter
* @param iosthe output stream
*/
void write_ambiguity_classes(FILE *out);
/** It reads the probabilities (matrices a and b) from the stream
* received as a parameter
* @param is the input stream
*/
void read_probabilities(FILE *in);
/** It writes the probabilities (matrices a and b) to the stream
* received as a parameter
* @param os the output stream
*/
void write_probabilities(FILE *out);
/** It reads the expanded dictionary received as a parameter and calculates
* the set of ambiguity classes that the tagger will manage.
* @param is the input stream with the expanded dictionary to read
*/
void read_dictionary(FILE *is);
/** It initializes the transtion (a) and emission (b) probabilities
* from an untagged input text by means of Kupiec's method
* @param is the input stream with the untagged corpus to process
*/
void init_probabilities_kupiec (FILE *is);
/** It initializes the transtion (a) and emission (b) probabilities
* from a tagged input text by means of the expected-likelihood
* estimate (ELE) method
* @param ftagged the input stream with the tagged corpus to process
* @param funtagged the same corpus to process but untagged
*/
void init_probabilities_from_tagged_text(FILE *ftagged, FILE *funtagged);
/** It applies the forbid and enforce rules found in tagger specification.
* To do so the transition matrix is modified by introducing null probabilities
* in the involved transitions.
*/
void apply_rules();
/** Unsupervised training algorithm (Baum-Welch implementation).
* @param is the input stream with the untagged corpus to process
*/
void train (FILE *is);
/** Tagging algorithm (Viterbi implementation).
* @param in the input stream with the untagged text to tag
* @param out the output stream with the tagged text
*/
void tagger (FILE *in, FILE *out, bool show_all_good_first=false);
/** Prints the A matrix.
*/
void print_A();
/** Prints the B matrix.
*/
void print_B();
/** Prints the ambiguity classes.
*/
void print_ambiguity_classes();
void filter_ambiguity_classes(FILE *in, FILE *out);
};
#endif
|