/usr/include/kytea/model-io-text.h is in libkytea-dev 0.4.6+dfsg-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | #ifndef MODEL_IO_TEXT_H__
#define MODEL_IO_TEXT_H__
#include <kytea/model-io.h>
#include <algorithm>
namespace kytea {
class CorpusIO;
class TextModelIO : public ModelIO {
public:
TextModelIO(StringUtil* util) : ModelIO(util) { }
TextModelIO(StringUtil* util, const char* file, bool out) : ModelIO(util,file,out,false) { }
TextModelIO(StringUtil* util, std::iostream & str, bool out) : ModelIO(util,str,out,false) { }
// writing functions
void writeConfig(const KyteaConfig & conf);
void writeModel(const KyteaModel * mod);
void writeWordList(const std::vector<KyteaString> & list);
void writeModelDictionary(const Dictionary<ModelTagEntry> * dict) { writeDictionary(dict); }
void writeProbDictionary(const Dictionary<ProbTagEntry> * dict) { writeDictionary(dict); }
void writeVectorDictionary(const Dictionary<FeatVec > * dict) { writeDictionary(dict); }
void writeLM(const KyteaLM * mod);
void writeFeatVec(const FeatVec * vec);
void writeFeatureLookup(const FeatureLookup * featLookup);
template <class Entry>
void writeEntry(const Entry * entry);
template <class Entry>
void writeDictionary(const Dictionary<Entry> * dict) {
if(dict == 0) {
*str_ << "0" << std::endl << "0" << std::endl;
return;
}
// write the states
*str_ << (unsigned)dict->getNumDicts() << std::endl;
const std::vector<DictionaryState*> & states = dict->getStates();
*str_ << states.size() << std::endl;
if(states.size() == 0)
return;
for(unsigned i = 0; i < states.size(); i++) {
*str_ << states[i]->failure;
for(unsigned j = 0; j < states[i]->gotos.size(); j++)
*str_ << " " << util_->showChar(states[i]->gotos[j].first) << " " << states[i]->gotos[j].second;
*str_ << std::endl;
for(unsigned j = 0; j < states[i]->output.size(); j++) {
if(j!=0) *str_ << " ";
*str_ << states[i]->output[j];
}
*str_ << std::endl;
*str_ << (states[i]->isBranch?'b':'n') << std::endl;
}
// write the entries
const std::vector<Entry*> & entries = dict->getEntries();
*str_ << entries.size() << std::endl;
for(unsigned i = 0; i < entries.size(); i++)
writeEntry((Entry*)entries[i]);
}
// create an appropriate parser based on the type
static CorpusIO* createIO(const char* file, Format form, bool output, StringUtil* util);
static CorpusIO* createIO(std::iostream & str, Format form, bool output, StringUtil* util);
void readConfig(KyteaConfig & conf);
KyteaModel * readModel();
std::vector<KyteaString> readWordList();
Dictionary<ModelTagEntry> * readModelDictionary() { return readDictionary<ModelTagEntry>(); }
Dictionary<ProbTagEntry> * readProbDictionary() { return readDictionary<ProbTagEntry>(); }
Dictionary<FeatVec > * readVectorDictionary() { return readDictionary<FeatVec >(); }
KyteaLM * readLM();
FeatVec * readFeatVec();
FeatureLookup * readFeatureLookup();
template <class Entry>
Entry * readEntry();
template <class Entry>
Dictionary<Entry> * readDictionary() {
Dictionary<Entry> * dict = new Dictionary<Entry>(util_);
std::string line, buff;
// get the number of dictionaries
std::getline(*str_, line);
dict->setNumDicts(util_->parseInt(line.c_str()));
// get the states
std::vector<DictionaryState*> & states = dict->getStates();
getline(*str_, line);
states.resize(util_->parseInt(line.c_str()));
if(states.size() == 0) {
delete dict;
return 0;
}
for(unsigned i = 0; i < states.size(); i++) {
DictionaryState * state = new DictionaryState();
getline(*str_, line);
std::istringstream iss(line);
iss >> buff;
state->failure = util_->parseInt(buff.c_str());
while(iss >> buff) {
std::pair<KyteaChar,unsigned> p;
p.first = util_->mapChar(buff.c_str());
if(!(iss >> buff))
THROW_ERROR("Badly formed model (goto character without a destination)");
p.second = util_->parseInt(buff.c_str());
state->gotos.push_back(p);
}
sort(state->gotos.begin(), state->gotos.end());
getline(*str_, line);
std::istringstream iss2(line);
while(iss2 >> buff)
state->output.push_back(util_->parseInt(buff.c_str()));
getline(*str_, line);
if(line.length() != 1)
THROW_ERROR("Badly formed model (branch indicator not found)");
state->isBranch = (line[0] == 'b');
states[i] = state;
}
// get the entries
std::vector<Entry*> & entries = dict->getEntries();
getline(*str_, line);
entries.resize(util_->parseInt(line.c_str()));
for(unsigned i = 0; i < entries.size(); i++) {
entries[i] = readEntry<Entry>();
}
return dict;
}
};
}
#endif
|