/usr/include/sunpinyin-2.0/lexicon/pytrie_gen.h is in libsunpinyin-dev 2.0.3-5ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | #ifndef _SUNPINYIN_PYTRIE_GEN_H__
#define _SUNPINYIN_PYTRIE_GEN_H__
#include "../portability.h"
#include <string>
#include <vector>
#include <map>
#include <set>
#include <list>
#include "pytrie.h"
class CWordEvaluator {
public:
virtual double
getCost(unsigned int wid) = 0;
virtual bool
isSeen(unsigned int wid) = 0;
};
class CPinyinTrieMaker {
public:
class TNode;
class TWordInfo;
union TWordId {
unsigned int m_all;
struct TAnony { //Some compiler do not support anonymous defaultly
#ifdef WORDS_BIGENDIAN
unsigned m_bHide : 1;
unsigned m_cost : 5;
unsigned m_csLevel : 2;
unsigned m_id : WORD_ID_WIDTH;
#else
unsigned m_id : WORD_ID_WIDTH;
unsigned m_csLevel : 2;
unsigned m_cost : 5;
unsigned m_bHide : 1;
#endif
} anony;
public:
TWordId() : m_all(0) { }
TWordId(const TWordId& b) : m_all(b.m_all) { }
TWordId(unsigned id, unsigned cost = 0, unsigned hide = 0, unsigned cslvl = 0)
{
anony.m_id=id;
anony.m_cost=cost;
anony.m_bHide= (hide)?1:0;
anony.m_csLevel=cslvl;
}
bool operator< (const TWordId& b) const
{ return anony.m_id < b.anony.m_id; }
bool operator==(const TWordId& b) const
{ return anony.m_id == b.anony.m_id; }
operator unsigned int() const
{ return anony.m_id; }
};
typedef std::set<TWordId> CWordSet;
typedef std::vector<TWordInfo> CWordVec;
typedef std::map<unsigned, TNode*> CTrans;
typedef std::set<TNode*> CNodeSet;
typedef std::list<TNode*> CNodeList;
typedef std::vector<std::string> CLexicon;
class TWordInfo {
public:
TWordId m_id;
double m_cost;
bool m_bSeen;
TWordInfo(TWordId id = 0, double cost = 0.0, bool seen=false)
: m_id(id), m_cost(cost), m_bSeen(seen)
{
if (m_id.anony.m_bHide) {
m_bSeen = false;
}
m_cost = cost + m_id.anony.m_cost;
}
bool
operator< (const TWordInfo& b) const
{
double fa = (m_bSeen)?(m_cost - 5000.0):(m_cost);
double fb = (b.m_bSeen)?(b.m_cost - 5000.0):(b.m_cost);
return (fa < fb);
}
};
class PNodeSet {
public:
PNodeSet(const CNodeSet *pns) : m_pns(pns) { }
PNodeSet(const PNodeSet& another) : m_pns(another.m_pns) { }
const CNodeSet*
operator->(void) { return m_pns; }
const CNodeSet&
operator*(void) { return *m_pns; }
bool
operator< (const PNodeSet& another) const;
bool
operator==(const PNodeSet& another) const;
protected:
const CNodeSet * m_pns;
};
typedef std::map<PNodeSet, TNode*> CStateMap;
class TNode {
public:
static CNodeList m_AllNodes;
public:
bool m_bExpanded;
bool m_bFullSyllableTransfer;
CWordSet m_WordIdSet;
CTrans m_Trans;
CNodeSet m_cmbNodes;
public:
TNode();
};
protected:
CStateMap m_StateMap;
TNode m_RootNode;
CLexicon m_Lexicon;
public:
CPinyinTrieMaker();
~CPinyinTrieMaker() {} //forget this
bool
constructFromLexicon(const char* fileName);
bool
insertFullPinyinPair(const char* pinyin, TWordId wid);
bool
threadNonCompletePinyin(void);
void
print(FILE* fp, TNode* root, std::string& pinyin);
bool
write(const char* fileName, CWordEvaluator* psrt, bool revert_endian);
bool
write(FILE *fp, CWordEvaluator* psrt, bool revert_endian);
protected:
TNode*
insertTransfer(TNode* pnode, unsigned s);
TNode*
addCombinedTransfers (TNode *pnode, unsigned s, const CNodeSet& nodes);
void
combineInitialTrans(TNode *pnode);
void
expandCombinedNode(TNode *pnode);
};
#endif
|