This file is indexed.

/usr/include/sunpinyin-2.0/slm/slm.h is in libsunpinyin-dev 3.0.0~git20160910-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
// -*- mode: c++ -*-
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
 *
 * The contents of this file are subject to the terms of either the GNU Lesser
 * General Public License Version 2.1 only ("LGPL") or the Common Development and
 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
 * file except in compliance with the License. You can obtain a copy of the CDDL at
 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
 * specific language governing permissions and limitations under the License. When
 * distributing the software, include this License Header Notice in each file and
 * include the full text of the License in the License file as well as the
 * following notice:
 *
 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
 * (CDDL)
 * For Covered Software in this distribution, this License shall be governed by the
 * laws of the State of California (excluding conflict-of-law provisions).
 * Any litigation relating to this License shall be subject to the jurisdiction of
 * the Federal Courts of the Northern District of California and the state courts
 * of the State of California, with venue lying in Santa Clara County, California.
 *
 * Contributor(s):
 *
 * If you wish your version of this file to be governed by only the CDDL or only
 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
 * license." If you don't indicate a single choice of license, a recipient has the
 * option to distribute your version of this file under either the CDDL or the LGPL
 * Version 2.1, or to extend the choice of license to its licensees as provided
 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
 * Version 2 license, then the option applies only if the new code is made subject
 * to such option by the copyright holder.
 */

#ifndef _SUN_AGC_SLM_H
#define _SUN_AGC_SLM_H

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "../portability.h"

#include <stdio.h>

/**
 * Thread slm make the following modifications to simple back-off language model
 *    -# Word id are limited to 18 bits, about 240K word ids
 *    -# Compact all float value of -log(pr) into 65536 (16 bits)
 *       level and use a table to map the index to a float value;
 *    -# Compact all float value of -log(pr) into 16384 (14 bits)
 *       level and use a table to map the index to a float value;
 *    -# threading infomation embed into binary model file. Threading include
 *         - bol(back-off-level) from current level
 *         - bon(back-off-node)'s index in the bol level array
 *         .
 *       The thread could be used:
 *         - when leaf node are arrived, it could use (bol,bon) as history for
 *           history node.
 *         - when a word could not be found in current node (cl, cn)'s children,
 *           searching could be transfered to (bol, bon) directly and continue
 *           searching the target word
 *    -# Add a basic type TState in Language model, a state is pair of\n
 *           (level, array_idx_of_the level)
 *    -# change all get probability interface to\n
 *          double transfer(TState& history, unsigned int wid, TState& result);
 */
class CThreadSlm {
public:
    enum {
        BITS_BOW        = 14,
        BITS_PR         = 16,
        ID_NOT_WORD     = 69,
    };

    /**
     * (level:idx) located a state in the language model very well
     * Please note the psuedo unigram state, with level == 0, but idx > 0
     * it's for used with bigram cache model
     */
    union TState {
        TState(const TState &b) : m_all(b.m_all) {
        }
        TState(unsigned level = 0, unsigned idx = 0) {
            anony.m_Level = level; anony.m_Idx = idx;
        }

        TState& operator++()              { ++anony.m_Idx; return *this; }

        void setIdx(unsigned int idx)     { anony.m_Idx = idx; }
        void setLevel(unsigned int lvl)   { anony.m_Level = lvl; }

        unsigned int getLevel() const { return anony.m_Level; }
        unsigned int getIdx() const { return anony.m_Idx; }
        operator unsigned() const { return m_all; }

        bool isTailState() const { return getIdx() <= 1; }

        bool operator==(const TState & b) const {
            return m_all == b.m_all;
        }
        bool operator<(const TState & b) const {
            return unsigned(*this) < unsigned(b);
        }

private:
        unsigned int m_all;
#ifndef WORDS_BIGENDIAN
        struct TAnonymous {
            unsigned m_Idx   : 24;
            unsigned m_Level : 8;
        } anony;
#else
        struct TAnonymous {
            unsigned m_Level : 8;
            unsigned m_Idx   : 24;
        } anony;
#endif
    };

    /**
     * Machine dependent
     */
    struct TNode {
public:
        unsigned int wid() const {
            return m_wid;
        }

        unsigned int bow() const {
            return m_bow;
        }

        unsigned int pr()  const {
            return m_pr;
        }

        unsigned int bon() const {
            return m_bon;
        }

        unsigned int bol() const {
            return m_bol;
        }

        unsigned int ch()  const {
            return((m_ch_hi << 16) + m_ch_lo);
        }

        void set_wid(unsigned int wid){
            m_wid = wid;
        }

        void set_bow(unsigned int bow){
            m_bow = bow;
        }

        void set_pr(unsigned int pr){
            m_pr = pr;
        }

        void set_bon(unsigned int bon){
            m_bon = bon;
        }

        void set_bol(unsigned int bol){
            m_bol = bol;
        }

        void set_ch(unsigned int ch){
            m_ch_hi = ((ch >> 16) & 0x7F);
            m_ch_lo = (ch & 0xFFFF);
        }

protected:
#ifndef WORDS_BIGENDIAN
        unsigned m_wid       : 18;
        unsigned m_bow       : 14;
        unsigned m_pr        : 16;
        unsigned m_ch_lo     : 16;
        unsigned m_bon       : 23;
        unsigned m_bol       : 2;
        unsigned m_ch_hi     : 7;
#else
        unsigned m_ch_hi     : 7;
        unsigned m_bol       : 2;
        unsigned m_bon       : 23;
        unsigned m_ch_lo     : 16;
        unsigned m_pr        : 16;
        unsigned m_bow       : 14;
        unsigned m_wid       : 18;
#endif

private:
        /**
         * Machine dependent
           union TChildIdx {
           public:
            inline TChildIdx(unsigned val) : m_all(val) { }
            inline TChildIdx(const TChildIdx& b) : m_all(b.m_all) { }
            inline TChildIdx(unsigned int hi, unsigned lo) : m_all(0) { anony.m_hi = hi; anony.m_lo = lo; }

            inline unsigned int lo() { return anony.m_lo; }
            inline unsigned int hi() { return anony.m_hi; }
            inline unsigned int all(){ return m_all; }

            inline unsigned int set_lo(unsigned int lo) { return (anony.m_lo = lo); }
            inline unsigned int set_hi(unsigned int hi) { return (anony.m_hi = hi); }
            inline unsigned int set_all(unsigned int all) { return (m_all = all); }

           private:
            unsigned int m_all;
         *#ifndef WORDS_BIGENDIAN
            struct TAnony {
                unsigned m_lo :16;
                unsigned m_hi : 7;
                unsigned NOUSE: 9;
            } anony;
         *#else
            struct TAnony {
                unsigned NOUSE: 9;
                unsigned m_hi : 7;
                unsigned m_lo :16;
            } anony;
         *#endif
           };
         */
    };

    /**
     * Machine dependent
     */
    struct TLeaf {
public:
        inline unsigned int wid() const { return m_wid; }
        inline unsigned int bon() const { return m_bon; }
        inline unsigned int bol() const { return m_bol; }
        inline unsigned int pr()  const { return((m_pr_hi << 14) + m_pr_lo); }

        inline void set_wid(unsigned int wid) { m_wid = wid; }
        inline void set_bon(unsigned int bon) { m_bon = bon; }
        inline void set_bol(unsigned int bol) { m_bol = bol; }
        inline void set_pr(unsigned int pr)   { m_pr_hi = ((pr >> 14) & 0x3);
                                                m_pr_lo = pr & 0x3FFF; }

protected:
#ifndef WORDS_BIGENDIAN
        unsigned m_wid       : 18;
        unsigned m_pr_lo     : 14;
        unsigned m_bon       : 23;
        unsigned m_bol       : 2;
        unsigned m_pr_hi     : 2;
#else
        unsigned m_pr_hi     : 2;
        unsigned m_bol       : 2;
        unsigned m_bon       : 23;
        unsigned m_pr_lo     : 14;
        unsigned m_wid       : 18;
#endif

private:
        /*
            union TPr {
            public:
                inline TPr(unsigned int val) : m_all(val) { }
                inline TPr(const TPr & b) : m_all(b.m_all) { }
                inline TPr(unsigned int hi, unsigned lo) : m_all(0) { anony.m_hi=hi, anony.m_lo=lo; }

                inline unsigned int lo() { return anony.m_lo; }
                inline unsigned int hi() { return anony.m_hi; }
                inline unsigned int all(){ return m_all; }

                inline unsigned int set_lo(unsigned int lo) { return (anony.m_lo = lo); }
                inline unsigned int set_hi(unsigned int hi) { return (anony.m_hi = hi); }
                inline unsigned int set_all(unsigned int all) { return (m_all = all); }

            private:
                unsigned int m_all;
           #ifndef WORDS_BIGENDIAN
                struct TAnony {
                    unsigned m_lo  :14;
                    unsigned m_hi  : 2;
                    unsigned NONUSE:16;
                } anony;
           #else
                struct TAnony {
                    unsigned NONUSE:16;
                    unsigned m_hi  : 2;
                    unsigned m_lo  :14;
                } anony;
           #endif
            };
         */
    };

public:
    CThreadSlm()
        : m_N(0), m_UseLogPr(0), m_Levels(NULL), m_LevelSizes(NULL),
          m_bowTable(NULL), m_prTable(NULL), m_bMMap(false), m_buf(NULL) { }

    ~CThreadSlm() { free(); }

    bool
    load(const char* fname, bool MMap = false);

    unsigned isUseLogPr() const
    { return m_UseLogPr; }

    void
    free();

    double
    transferNegLog(TState history, unsigned int wid, TState& result);

    double
    transfer(TState history, unsigned int wid, TState& result);

    TState
    history_state_of(TState st);

    TState&
    historify(TState& st);

    unsigned int
    lastWordId(TState st);

protected:
    double
    rawTransfer(TState history, unsigned int wid, TState& result);

protected:
    typedef  void*   PtrVoid;

    unsigned m_N;
    unsigned m_UseLogPr;
    void    **m_Levels;
    unsigned *m_LevelSizes;
    float    *m_bowTable;
    float    *m_prTable;

private:
    ssize_t m_bufSize;
    bool m_bMMap;
    char     *m_buf;
};

#endif

// -*- indent-tabs-mode: nil -*- vim:et:ts=4