/usr/include/dlib/unicode/unicode_abstract.h is in libdlib-dev 18.18-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | // Copyright (C) 2007 Davis E. King (davis@dlib.net), and Nils Labugt
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_UNICODe_ABSTRACT_H_
#ifdef DLIB_UNICODe_ABSTRACT_H_
#include "../uintn.h"
#include "../error.h"
#include <string>
#include <fstream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
// a typedef for an unsigned 32bit integer to hold our UNICODE characters
typedef uint32 unichar;
// a typedef for a string object to hold our UNICODE strings
typedef std::basic_string<unichar> ustring;
// ----------------------------------------------------------------------------------------
template <typename T>
bool is_combining_char(
const T ch_
);
/*!
ensures
- if (ch_ is a unicode combining character) then
- returns true
- else
- returns false
!*/
bool is_surrogate(
unichar ch
);
/*!
ensures
- if (ch is a unicode surrogate character) then
- returns true
- else
- returns false
!*/
unichar surrogate_pair_to_unichar(
unichar first,
unichar second
);
/*!
requires
- 0xD800 <= first < 0xDC00
- 0xDC00 <= second < 0xE000
- is_surrogate(first) == true
- is_surrogate(second) == true
ensures
- converts two surrogates into one unicode character
!*/
void unichar_to_surrogate_pair(
unichar ch,
unichar& first,
unichar& second
);
/*!
requires
- ch >= 0x10000 (i.e. is not in Basic Multilingual Plane)
ensures
- surrogate_pair_to_unichar(#first,#second) == ch
(i.e. converts ch into two surrogate characters)
!*/
// ----------------------------------------------------------------------------------------
class invalid_utf8_error : public error
{
public:
invalid_utf8_error():error(EUTF8_TO_UTF32) {}
};
const ustring convert_utf8_to_utf32 (
const std::string& str
);
/*!
ensures
- if (str is a valid UTF-8 encoded string) then
- returns a copy of str that has been converted into a
unichar string
- else
- throws invalid_utf8_error
!*/
// ----------------------------------------------------------------------------------------
const ustring convert_wstring_to_utf32 (
const std::wstring &wstr
);
/*!
requires
- wstr is a valid UTF-16 string when sizeof(wchar_t) == 2
- wstr is a valid UTF-32 string when sizeof(wchar_t) == 4
ensures
- converts wstr into UTF-32 string
!*/
// ----------------------------------------------------------------------------------------
const std::wstring convert_utf32_to_wstring (
const ustring &str
);
/*!
requires
- str is a valid UTF-32 encoded string
ensures
- converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
- converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
!*/
// ----------------------------------------------------------------------------------------
const std::wstring convert_mbstring_to_wstring (
const std::string &str
);
/*!
requires
- str is a valid multibyte string whose encoding is same as current locale setting
ensures
- converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
- converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
!*/
// ----------------------------------------------------------------------------------------
const std::string convert_wstring_to_mbstring (
const std::wstring &src
);
/*!
requires
- str is a valid wide character string string whose encoding is same as current
locale setting
ensures
- returns a multibyte encoded version of the given string
!*/
// ----------------------------------------------------------------------------------------
template <
typename charT
>
class basic_utf8_ifstream : public std::basic_istream<charT>
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents an input file stream much like the
normal std::ifstream except that it knows how to read UTF-8
data. So when you read characters out of this stream it will
automatically convert them from the UTF-8 multibyte encoding
into a fixed width wide character encoding.
!*/
public:
basic_utf8_ifstream (
);
/*!
ensures
- constructs an input stream that isn't yet associated with
a file.
!*/
basic_utf8_ifstream (
const char* file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
basic_utf8_ifstream (
const std::string& file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
void open(
const std::string& file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
void open (
const char* file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
void close (
);
/*!
ensures
- any file opened by this stream has been closed
!*/
};
typedef basic_utf8_ifstream<unichar> utf8_uifstream;
typedef basic_utf8_ifstream<wchar_t> utf8_wifstream;
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_UNICODe_ABSTRACT_H_
|