/usr/include/apertium-3.4/apertium/lswpost.h is in apertium-dev 3.4.2~r68466-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | /*
* Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/**
* Light Sliding-Window Part of Speech Tagger (LSWPoST) implementation (header)
*
* @author Gang Chen - pkuchengang@gmail.com
*/
#ifndef __LSWPOST_H
#define __LSWPOST_H
#include "file_tagger.h"
#include <cstdio>
#include <fstream>
#include <math.h>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <cfloat>
#include <cstring>
#include <apertium/collection.h>
#include <apertium/constant_manager.h>
#include <apertium/morpho_stream.h>
#include <apertium/tagger_data_lsw.h>
#include <apertium/tagger_utils.h>
#include <apertium/tagger_word.h>
#define ZERO 1e-10
/** LSWPoST
* Light Sliding-Window Part of Speech Tagger
*/
class LSWPoST : public Apertium::FILE_Tagger {
private:
TaggerDataLSW tdlsw;
TTag eos; // end-of-sentence tag
public:
void deserialise(FILE *Serialised_FILE_Tagger);
std::vector<std::wstring> &getArrayTags();
void train(FILE *Corpus, unsigned long Count);
void serialise(FILE *Stream_);
void deserialise(const TaggerData &Deserialised_FILE_Tagger);
void init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
FILE *UntaggedCorpus);
void init_probabilities_kupiec_(FILE *Corpus);
LSWPoST();
LSWPoST(TaggerDataLSW *tdlsw);
/** Constructor
*/
LSWPoST(TaggerDataLSW t);
/** Destructor
*/
~LSWPoST();
/** Used to set the end-of-sentence tag
* @param t the end-of-sentence tag
*/
void set_eos(TTag t);
/** It reads the expanded dictionary received as a parameter and calculates
* the set of ambiguity classes that the tagger will manage.
* @param fdic the input stream with the expanded dictionary to read
*/
void read_dictionary(FILE *fdic);
/** Whether a tag sequence is valid, according to the forbid and enforce rules
*/
bool is_valid_seq(TTag left, TTag mid, TTag right);
/** Init probabilities
* It applies the forbid and enforce rules found in tagger specification.
* To do so, the joint probability of a tag sequence that contains a forbid
* rule, or doesn't satisfy a enforce rule, is set to 0.
*/
void init_probabilities(FILE *ftxt);
/** Unsupervised training algorithm (Baum-Welch implementation).
* @param ftxt the input stream with the untagged corpus to process
*/
void train (FILE *ftxt);
/** Prints the para matrix.
*/
void print_para_matrix();
/** Do the tagging
*/
void tagger(FILE *Input, FILE *Output, const bool &First = false);
};
#endif
|