This file is indexed.

/usr/include/gnash/utf8.h is in gnash-dev 0.8.11~git20160109-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
// utf8.h: utilities for converting to and from UTF-8
// 
//   Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
// 
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
//
// Based on the public domain work of Thatcher Ulrich <tu@tulrich.com> 2004

#ifndef UTF8_H
#define UTF8_H

#include <string>
#include <cstdint> // for C99 int types
#include <vector>

#include "dsodefs.h" // For DSOEXPORT

// Android doesn't have any support for wide characters at all.
#ifdef __ANDROID__
namespace std {
typedef basic_string
   <wchar_t
   ,std::char_traits<wchar_t>
   ,std::allocator<wchar_t> >
wstring;
}
#endif

namespace gnash {

/// Utilities to convert between std::string and std::wstring.
//
/// Strings in Gnash are generally stored as std::strings.
/// We have to deal, however, with characters larger than standard
/// ASCII (128), which can be encoded in two different ways.
///
/// SWF6 and later use UTF-8, encoded as multibyte characters and
/// allowing many thousands of unique codes. Multibyte characters are 
/// difficult to handle, as their length - used for many string
/// operations - is not certain without parsing the string.
/// Converting the string to a wstring (generally a uint32_t - the 
/// pp seems only to handle characters up to 65535 - two bytes is
/// the minimum size of a wchar) facilitates string operations, as
/// the length of the string is equal to the number of valid characters. 
/// 
/// SWF5 and earlier, however, used the ISO-8859 specification,
/// allowing the standard 128 ASCII characters plus 128 extra
/// characters that depend on the particular subset of ISO-8859.
/// Characters are 8 bits, not the ASCII standard 7. SWF5 cannot
/// handle multi-byte characters without special functions.
///
/// It is important that SWF5 can distinguish between the two encodings,
/// so we cannot convert all strings to UTF-8.
//
/// Please note that, although this is called utf8, what the Adobe
/// player uses is only loosely related to real unicode, so the
/// encoding support here is correspondingly non-standard.
namespace utf8 {

    /// Converts a std::string with multibyte characters into a std::wstring.
    //
    /// @return a version-dependent wstring.
    /// @param str the canonical string to convert.
    /// @param version the SWF version, used to decide how to decode the string.
    //
    /// For SWF5, UTF-8 (or any other) multibyte encoded characters are
    /// converted char by char, mangling the string. 
    DSOEXPORT std::wstring decodeCanonicalString(const std::string& str, int version);

    /// Converts a std::wstring into canonical std::string.
    //
    /// @return a version-dependent encoded std::string.
    /// @param wstr the wide string to convert.
    /// @param version the SWF version, used to decide how to encode the string.
    ///
    /// For SWF 5, each character is stored as an 8-bit (at least) char, rather
    /// than converting it to a canonical UTF-8 byte sequence. Gnash can then
    /// distinguish between 8-bit characters, which it handles correctly, and 
    /// multi-byte characters, which are regarded as multiple characters for
    /// string methods. 
    DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr, int version);

    /// Return the next Unicode character in the UTF-8 encoded string.
    //
    /// Invalid UTF-8 sequences produce a U+FFFD character
    /// as output.  Advances string iterator past the character
    /// returned, unless the returned character is '\0', in which
    /// case the iterator does not advance.
    DSOEXPORT std::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it,
                                                     const std::string::const_iterator& e);

    /// \brief Encodes the given wide character into a canonical
    /// string, theoretically up to 6 chars in length.
    DSOEXPORT std::string encodeUnicodeCharacter(std::uint32_t ucs_character);
    
    /// Encodes the given wide character into an at least 8-bit character.
    //
    /// Allows storage of Latin1 (ISO-8859-1) characters. This
    /// is the format of SWF5 and below.
    DSOEXPORT std::string encodeLatin1Character(std::uint32_t ucsCharacter);

    enum TextEncoding {
        encUNSPECIFIED,
        encUTF8,
        encUTF16BE,
        encUTF16LE,
        encUTF32BE,
        encUTF32LE,
        encSCSU,
        encUTF7,
        encUTFEBCDIC,
        encBOCU1
    };

    /// Interpret (and skip) Byte Order Mark in input stream
    //
    /// This function takes a pointer to a buffer and returns
    /// the start of actual data after an eventual BOM.
    /// No conversion is performed, no bytes copy, just skipping of
    /// the BOM snippet and interpretation of it returned to the
    /// encoding input parameter.
    ///
    /// See http://en.wikipedia.org/wiki/Byte-order_mark
    ///
    /// @param in
    ///    The input buffer.
    ///
    /// @param size
    ///    Size of the input buffer, will be decremented by the
    ///    size of the BOM, if any.
    ///
    /// @param encoding
    ///    Output parameter, will always be set.
    ///    encUNSPECIFIED if no BOM is found.
    ///
    /// @returns
    ///    A pointer either equal to 'in' or some bytes inside it.
    ///
    DSOEXPORT const char* stripBOM(const char* in, size_t& size,
                                   TextEncoding& encoding);

    /// Return name of a text encoding
    DSOEXPORT const char* textEncodingName(TextEncoding enc);

    enum EncodingGuess {
        ENCGUESS_UNICODE = 0,
        ENCGUESS_JIS = 1,
        ENCGUESS_OTHER = 2
    };

    /// Common code for guessing at the encoding of random text, between
    // Shift-Jis, UTF8, and other. Puts the DisplayObject count in length,
    // and the offsets to the DisplayObjects in offsets, if offsets is not NULL.
    // If not NULL, offsets should be at least s.length().
    // offsets are not accurate if the return value is GUESSENC_OTHER
    //
    /// TODO: It's doubtful if this even works, and it may not be useful at
    /// all.
    DSOEXPORT EncodingGuess guessEncoding(const std::string& s, int& length,
            std::vector<int>& offsets);


} // namespace utf8
} // namespace gnash

#endif // UTF8_H


// Local Variables:
// mode: C++
// c-basic-offset: 8 
// tab-width: 8
// indent-tabs-mode: t
// End: