This file is indexed.

/usr/share/doc/libghc-text-icu-doc/html/src/Data-Text-ICU-Normalize.html is in libghc-text-icu-doc 0.6.3.7-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<!-- Generated by HsColour, http://code.haskell.org/~malcolm/hscolour/ -->
<title>dist-ghc/build/Data/Text/ICU/Normalize.hs</title>
<link type='text/css' rel='stylesheet' href='hscolour.css' />
</head>
<body>
<pre><a name="line-1"></a><span class='hs-comment'>{-# LINE 1 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-2"></a><span class='hs-comment'>{-# LANGUAGE CPP, DeriveDataTypeable, ForeignFunctionInterface #-}</span>
<a name="line-3"></a><span class='hs-comment'>{-# LINE 2 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-4"></a><span class='hs-comment'>-- |</span>
<a name="line-5"></a><span class='hs-comment'>-- Module      : Data.Text.ICU.Normalize</span>
<a name="line-6"></a><span class='hs-comment'>-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan</span>
<a name="line-7"></a><span class='hs-comment'>--</span>
<a name="line-8"></a><span class='hs-comment'>-- License     : BSD-style</span>
<a name="line-9"></a><span class='hs-comment'>-- Maintainer  : bos@serpentine.com</span>
<a name="line-10"></a><span class='hs-comment'>-- Stability   : experimental</span>
<a name="line-11"></a><span class='hs-comment'>-- Portability : GHC</span>
<a name="line-12"></a><span class='hs-comment'>--</span>
<a name="line-13"></a><span class='hs-comment'>-- Character set normalization functions for Unicode, implemented as</span>
<a name="line-14"></a><span class='hs-comment'>-- bindings to the International Components for Unicode (ICU)</span>
<a name="line-15"></a><span class='hs-comment'>-- libraries.</span>
<a name="line-16"></a>
<a name="line-17"></a><span class='hs-keyword'>module</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Normalize</span>
<a name="line-18"></a>    <span class='hs-layout'>(</span>
<a name="line-19"></a>    <span class='hs-comment'>-- * Unicode normalization API</span>
<a name="line-20"></a>    <span class='hs-comment'>-- $api</span>
<a name="line-21"></a>      <span class='hs-conid'>NormalizationMode</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span>
<a name="line-22"></a>    <span class='hs-comment'>-- * Normalization functions</span>
<a name="line-23"></a>    <span class='hs-layout'>,</span> <span class='hs-varid'>normalize</span>
<a name="line-24"></a>    <span class='hs-comment'>-- * Normalization checks</span>
<a name="line-25"></a>    <span class='hs-layout'>,</span> <span class='hs-varid'>quickCheck</span>
<a name="line-26"></a>    <span class='hs-layout'>,</span> <span class='hs-varid'>isNormalized</span>
<a name="line-27"></a>    <span class='hs-comment'>-- * Normalization-sensitive comparison</span>
<a name="line-28"></a>    <span class='hs-layout'>,</span> <span class='hs-conid'>CompareOption</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span>
<a name="line-29"></a>    <span class='hs-layout'>,</span> <span class='hs-varid'>compare</span>
<a name="line-30"></a>    <span class='hs-layout'>)</span> <span class='hs-keyword'>where</span>
<a name="line-31"></a>
<a name="line-32"></a>
<a name="line-33"></a><span class='hs-comment'>{-# LINE 33 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-34"></a>
<a name="line-35"></a>
<a name="line-36"></a><span class='hs-comment'>{-# LINE 35 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-37"></a>
<a name="line-38"></a><span class='hs-comment'>{-# LINE 36 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-39"></a>
<a name="line-40"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span> <span class='hs-layout'>(</span><span class='hs-conid'>Text</span><span class='hs-layout'>)</span>
<a name="line-41"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Foreign</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromPtr</span><span class='hs-layout'>,</span> <span class='hs-varid'>useAsPtr</span><span class='hs-layout'>)</span>
<a name="line-42"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Error</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>UErrorCode</span><span class='hs-layout'>,</span> <span class='hs-varid'>handleError</span><span class='hs-layout'>,</span> <span class='hs-varid'>handleOverflowError</span><span class='hs-layout'>)</span>
<a name="line-43"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>UBool</span><span class='hs-layout'>,</span> <span class='hs-conid'>UChar</span><span class='hs-layout'>,</span> <span class='hs-varid'>asBool</span><span class='hs-layout'>,</span> <span class='hs-varid'>asOrdering</span><span class='hs-layout'>)</span>
<a name="line-44"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Normalize</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>UNormalizationCheckResult</span><span class='hs-layout'>,</span> <span class='hs-varid'>toNCR</span><span class='hs-layout'>)</span>
<a name="line-45"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Typeable</span> <span class='hs-layout'>(</span><span class='hs-conid'>Typeable</span><span class='hs-layout'>)</span>
<a name="line-46"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Int</span> <span class='hs-layout'>(</span><span class='hs-conid'>Int32</span><span class='hs-layout'>)</span>
<a name="line-47"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Word</span> <span class='hs-layout'>(</span><span class='hs-conid'>Word32</span><span class='hs-layout'>)</span>
<a name="line-48"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>C</span><span class='hs-varop'>.</span><span class='hs-conid'>Types</span> <span class='hs-layout'>(</span><span class='hs-conid'>CInt</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-49"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>Ptr</span> <span class='hs-layout'>(</span><span class='hs-conid'>Ptr</span><span class='hs-layout'>,</span> <span class='hs-varid'>castPtr</span><span class='hs-layout'>)</span>
<a name="line-50"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>System</span><span class='hs-varop'>.</span><span class='hs-conid'>IO</span><span class='hs-varop'>.</span><span class='hs-conid'>Unsafe</span> <span class='hs-layout'>(</span><span class='hs-varid'>unsafePerformIO</span><span class='hs-layout'>)</span>
<a name="line-51"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Prelude</span> <span class='hs-varid'>hiding</span> <span class='hs-layout'>(</span><span class='hs-varid'>compare</span><span class='hs-layout'>)</span>
<a name="line-52"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>List</span> <span class='hs-layout'>(</span><span class='hs-varid'>foldl'</span><span class='hs-layout'>)</span>
<a name="line-53"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Bits</span> <span class='hs-layout'>(</span><span class='hs-layout'>(</span><span class='hs-varop'>.|.</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-54"></a>
<a name="line-55"></a><span class='hs-comment'>-- $api</span>
<a name="line-56"></a><span class='hs-comment'>--</span>
<a name="line-57"></a><span class='hs-comment'>-- The 'normalize' function transforms Unicode text into an equivalent</span>
<a name="line-58"></a><span class='hs-comment'>-- composed or decomposed form, allowing for easier sorting and</span>
<a name="line-59"></a><span class='hs-comment'>-- searching of text.  'normalize' supports the standard normalization</span>
<a name="line-60"></a><span class='hs-comment'>-- forms described in &lt;<a href="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>&gt;,</span>
<a name="line-61"></a><span class='hs-comment'>-- Unicode Standard Annex #15: Unicode Normalization Forms.</span>
<a name="line-62"></a><span class='hs-comment'>--</span>
<a name="line-63"></a><span class='hs-comment'>-- Characters with accents or other adornments can be encoded in</span>
<a name="line-64"></a><span class='hs-comment'>-- several different ways in Unicode.  For example, take the character A-acute.</span>
<a name="line-65"></a><span class='hs-comment'>-- In Unicode, this can be encoded as a single character (the</span>
<a name="line-66"></a><span class='hs-comment'>-- \"composed\" form):</span>
<a name="line-67"></a><span class='hs-comment'>--</span>
<a name="line-68"></a><span class='hs-comment'>-- @</span>
<a name="line-69"></a><span class='hs-comment'>--      00C1    LATIN CAPITAL LETTER A WITH ACUTE</span>
<a name="line-70"></a><span class='hs-comment'>-- @</span>
<a name="line-71"></a><span class='hs-comment'>--</span>
<a name="line-72"></a><span class='hs-comment'>-- or as two separate characters (the \"decomposed\" form):</span>
<a name="line-73"></a><span class='hs-comment'>--</span>
<a name="line-74"></a><span class='hs-comment'>-- @</span>
<a name="line-75"></a><span class='hs-comment'>--      0041    LATIN CAPITAL LETTER A</span>
<a name="line-76"></a><span class='hs-comment'>--      0301    COMBINING ACUTE ACCENT</span>
<a name="line-77"></a><span class='hs-comment'>-- @</span>
<a name="line-78"></a><span class='hs-comment'>--</span>
<a name="line-79"></a><span class='hs-comment'>-- To a user of your program, however, both of these sequences should</span>
<a name="line-80"></a><span class='hs-comment'>-- be treated as the same \"user-level\" character \"A with acute</span>
<a name="line-81"></a><span class='hs-comment'>-- accent\".  When you are searching or comparing text, you must</span>
<a name="line-82"></a><span class='hs-comment'>-- ensure that these two sequences are treated equivalently.  In</span>
<a name="line-83"></a><span class='hs-comment'>-- addition, you must handle characters with more than one accent.</span>
<a name="line-84"></a><span class='hs-comment'>-- Sometimes the order of a character's combining accents is</span>
<a name="line-85"></a><span class='hs-comment'>-- significant, while in other cases accent sequences in different</span>
<a name="line-86"></a><span class='hs-comment'>-- orders are really equivalent.</span>
<a name="line-87"></a><span class='hs-comment'>--</span>
<a name="line-88"></a><span class='hs-comment'>-- Similarly, the string \"ffi\" can be encoded as three separate letters:</span>
<a name="line-89"></a><span class='hs-comment'>--</span>
<a name="line-90"></a><span class='hs-comment'>-- @</span>
<a name="line-91"></a><span class='hs-comment'>--      0066    LATIN SMALL LETTER F</span>
<a name="line-92"></a><span class='hs-comment'>--      0066    LATIN SMALL LETTER F</span>
<a name="line-93"></a><span class='hs-comment'>--      0069    LATIN SMALL LETTER I</span>
<a name="line-94"></a><span class='hs-comment'>-- @</span>
<a name="line-95"></a><span class='hs-comment'>--</span>
<a name="line-96"></a><span class='hs-comment'>-- or as the single character</span>
<a name="line-97"></a><span class='hs-comment'>--</span>
<a name="line-98"></a><span class='hs-comment'>-- @</span>
<a name="line-99"></a><span class='hs-comment'>--      FB03    LATIN SMALL LIGATURE FFI</span>
<a name="line-100"></a><span class='hs-comment'>-- @</span>
<a name="line-101"></a><span class='hs-comment'>--</span>
<a name="line-102"></a><span class='hs-comment'>-- The \"ffi\" ligature is not a distinct semantic character, and</span>
<a name="line-103"></a><span class='hs-comment'>-- strictly speaking it shouldn't be in Unicode at all, but it was</span>
<a name="line-104"></a><span class='hs-comment'>-- included for compatibility with existing character sets that</span>
<a name="line-105"></a><span class='hs-comment'>-- already provided it.  The Unicode standard identifies such</span>
<a name="line-106"></a><span class='hs-comment'>-- characters by giving them \"compatibility\" decompositions into the</span>
<a name="line-107"></a><span class='hs-comment'>-- corresponding semantic characters.  When sorting and searching, you</span>
<a name="line-108"></a><span class='hs-comment'>-- will often want to use these mappings.</span>
<a name="line-109"></a><span class='hs-comment'>--</span>
<a name="line-110"></a><span class='hs-comment'>-- 'normalize' helps solve these problems by transforming text into</span>
<a name="line-111"></a><span class='hs-comment'>-- the canonical composed and decomposed forms as shown in the first</span>
<a name="line-112"></a><span class='hs-comment'>-- example above.  In addition, you can have it perform compatibility</span>
<a name="line-113"></a><span class='hs-comment'>-- decompositions so that you can treat compatibility characters the</span>
<a name="line-114"></a><span class='hs-comment'>-- same as their equivalents.  Finally, 'normalize' rearranges accents</span>
<a name="line-115"></a><span class='hs-comment'>-- into the proper canonical order, so that you do not have to worry</span>
<a name="line-116"></a><span class='hs-comment'>-- about accent rearrangement on your own.</span>
<a name="line-117"></a><span class='hs-comment'>--</span>
<a name="line-118"></a><span class='hs-comment'>-- Form 'FCD', \"Fast C or D\", is also designed for collation.  It</span>
<a name="line-119"></a><span class='hs-comment'>-- allows to work on strings that are not necessarily normalized with</span>
<a name="line-120"></a><span class='hs-comment'>-- an algorithm (like in collation) that works under \"canonical</span>
<a name="line-121"></a><span class='hs-comment'>-- closure\", i.e., it treats precomposed characters and their</span>
<a name="line-122"></a><span class='hs-comment'>-- decomposed equivalents the same.</span>
<a name="line-123"></a><span class='hs-comment'>--</span>
<a name="line-124"></a><span class='hs-comment'>-- It is not a normalization form because it does not provide for</span>
<a name="line-125"></a><span class='hs-comment'>-- uniqueness of representation. Multiple strings may be canonically</span>
<a name="line-126"></a><span class='hs-comment'>-- equivalent (their NFDs are identical) and may all conform to 'FCD'</span>
<a name="line-127"></a><span class='hs-comment'>-- without being identical themselves.</span>
<a name="line-128"></a><span class='hs-comment'>--</span>
<a name="line-129"></a><span class='hs-comment'>-- The form is defined such that the \"raw decomposition\", the</span>
<a name="line-130"></a><span class='hs-comment'>-- recursive canonical decomposition of each character, results in a</span>
<a name="line-131"></a><span class='hs-comment'>-- string that is canonically ordered. This means that precomposed</span>
<a name="line-132"></a><span class='hs-comment'>-- characters are allowed for as long as their decompositions do not</span>
<a name="line-133"></a><span class='hs-comment'>-- need canonical reordering.</span>
<a name="line-134"></a><span class='hs-comment'>--</span>
<a name="line-135"></a><span class='hs-comment'>-- Its advantage for a process like collation is that all 'NFD' and</span>
<a name="line-136"></a><span class='hs-comment'>-- most 'NFC' texts - and many unnormalized texts - already conform to</span>
<a name="line-137"></a><span class='hs-comment'>-- 'FCD' and do not need to be normalized ('NFD') for such a</span>
<a name="line-138"></a><span class='hs-comment'>-- process. The 'FCD' 'quickCheck' will return 'Yes' for most strings</span>
<a name="line-139"></a><span class='hs-comment'>-- in practice.</span>
<a name="line-140"></a><span class='hs-comment'>--</span>
<a name="line-141"></a><span class='hs-comment'>-- @'normalize' 'FCD'@ may be implemented with 'NFD'.</span>
<a name="line-142"></a><span class='hs-comment'>--</span>
<a name="line-143"></a><span class='hs-comment'>-- For more details on 'FCD' see the collation design document:</span>
<a name="line-144"></a><span class='hs-comment'>-- &lt;<a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm</a>&gt;</span>
<a name="line-145"></a><span class='hs-comment'>--</span>
<a name="line-146"></a><span class='hs-comment'>-- ICU collation performs either 'NFD' or 'FCD' normalization</span>
<a name="line-147"></a><span class='hs-comment'>-- automatically if normalization is turned on for the collator</span>
<a name="line-148"></a><span class='hs-comment'>-- object.  Beyond collation and string search, normalized strings may</span>
<a name="line-149"></a><span class='hs-comment'>-- be useful for string equivalence comparisons,</span>
<a name="line-150"></a><span class='hs-comment'>-- transliteration/transcription, unique representations, etc.</span>
<a name="line-151"></a><span class='hs-comment'>--</span>
<a name="line-152"></a><span class='hs-comment'>-- The W3C generally recommends to exchange texts in 'NFC'.  Note also</span>
<a name="line-153"></a><span class='hs-comment'>-- that most legacy character encodings use only precomposed forms and</span>
<a name="line-154"></a><span class='hs-comment'>-- often do not encode any combining marks by themselves. For</span>
<a name="line-155"></a><span class='hs-comment'>-- conversion to such character encodings the Unicode text needs to be</span>
<a name="line-156"></a><span class='hs-comment'>-- normalized to 'NFC'.  For more usage examples, see the Unicode</span>
<a name="line-157"></a><span class='hs-comment'>-- Standard Annex.</span>
<a name="line-158"></a>
<a name="line-159"></a><a name="UCompareOption"></a><span class='hs-keyword'>type</span> <span class='hs-conid'>UCompareOption</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>Word32</span>
<a name="line-160"></a>
<a name="line-161"></a><a name="CompareOption"></a><span class='hs-comment'>-- | Options to 'compare'.</span>
<a name="line-162"></a><a name="CompareOption"></a><span class='hs-keyword'>data</span> <span class='hs-conid'>CompareOption</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>InputIsFCD</span>
<a name="line-163"></a>                   <span class='hs-comment'>-- ^ The caller knows that both strings fulfill the</span>
<a name="line-164"></a>                   <span class='hs-comment'>-- 'FCD' conditions.  If /not/ set, 'compare' will</span>
<a name="line-165"></a>                   <span class='hs-comment'>-- 'quickCheck' for 'FCD' and normalize if</span>
<a name="line-166"></a>                   <span class='hs-comment'>-- necessary.</span>
<a name="line-167"></a>                   <span class='hs-keyglyph'>|</span> <span class='hs-conid'>CompareIgnoreCase</span>
<a name="line-168"></a>                   <span class='hs-comment'>-- ^ Compare strings case-insensitively using case</span>
<a name="line-169"></a>                   <span class='hs-comment'>-- folding, instead of case-sensitively.  If set,</span>
<a name="line-170"></a>                   <span class='hs-comment'>-- then the following case folding options are</span>
<a name="line-171"></a>                   <span class='hs-comment'>-- used.</span>
<a name="line-172"></a>                   <span class='hs-keyglyph'>|</span> <span class='hs-conid'>FoldCaseExcludeSpecialI</span>
<a name="line-173"></a>                   <span class='hs-comment'>-- ^ When case folding, exclude the special I</span>
<a name="line-174"></a>                   <span class='hs-comment'>-- character.  For use with Turkic</span>
<a name="line-175"></a>                   <span class='hs-comment'>-- (Turkish/Azerbaijani) text data.</span>
<a name="line-176"></a>                     <span class='hs-keyword'>deriving</span> <span class='hs-layout'>(</span><span class='hs-conid'>Eq</span><span class='hs-layout'>,</span> <span class='hs-conid'>Show</span><span class='hs-layout'>,</span> <span class='hs-conid'>Enum</span><span class='hs-layout'>,</span> <span class='hs-conid'>Typeable</span><span class='hs-layout'>)</span>
<a name="line-177"></a>
<a name="line-178"></a><a name="fromCompareOption"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>CompareOption</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>UCompareOption</span>
<a name="line-179"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-conid'>InputIsFCD</span>              <span class='hs-keyglyph'>=</span> <span class='hs-num'>131072</span>
<a name="line-180"></a><span class='hs-comment'>{-# LINE 177 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-181"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-conid'>CompareIgnoreCase</span>       <span class='hs-keyglyph'>=</span> <span class='hs-num'>65536</span>
<a name="line-182"></a><span class='hs-comment'>{-# LINE 178 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-183"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-conid'>FoldCaseExcludeSpecialI</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>1</span>
<a name="line-184"></a><span class='hs-comment'>{-# LINE 179 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-185"></a>
<a name="line-186"></a><a name="reduceCompareOptions"></a><span class='hs-definition'>reduceCompareOptions</span> <span class='hs-keyglyph'>::</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>CompareOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>UCompareOption</span>
<a name="line-187"></a><span class='hs-definition'>reduceCompareOptions</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>foldl'</span> <span class='hs-varid'>orO</span> <span class='hs-layout'>(</span><span class='hs-num'>32768</span><span class='hs-layout'>)</span>
<a name="line-188"></a><span class='hs-comment'>{-# LINE 182 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-189"></a>    <span class='hs-keyword'>where</span> <span class='hs-varid'>a</span> <span class='hs-varop'>`orO`</span> <span class='hs-varid'>b</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>a</span> <span class='hs-varop'>.|.</span> <span class='hs-varid'>fromCompareOption</span> <span class='hs-varid'>b</span>
<a name="line-190"></a>
<a name="line-191"></a><a name="UNormalizationMode"></a><span class='hs-keyword'>type</span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>CInt</span>
<a name="line-192"></a>
<a name="line-193"></a><a name="NormalizationMode"></a><span class='hs-comment'>-- | Normalization modes.</span>
<a name="line-194"></a><a name="NormalizationMode"></a><span class='hs-keyword'>data</span> <span class='hs-conid'>NormalizationMode</span>
<a name="line-195"></a>    <span class='hs-keyglyph'>=</span> <span class='hs-conid'>None</span>   <span class='hs-comment'>-- ^ No decomposition/composition.</span>
<a name="line-196"></a>    <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFD</span>    <span class='hs-comment'>-- ^ Canonical decomposition.</span>
<a name="line-197"></a>    <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFKD</span>   <span class='hs-comment'>-- ^ Compatibility decomposition.</span>
<a name="line-198"></a>    <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFC</span>    <span class='hs-comment'>-- ^ Canonical decomposition followed by canonical composition.</span>
<a name="line-199"></a>    <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFKC</span>   <span class='hs-comment'>-- ^ Compatibility decomposition followed by canonical composition.</span>
<a name="line-200"></a>    <span class='hs-keyglyph'>|</span> <span class='hs-conid'>FCD</span>    <span class='hs-comment'>-- ^ \"Fast C or D\" form.</span>
<a name="line-201"></a>      <span class='hs-keyword'>deriving</span> <span class='hs-layout'>(</span><span class='hs-conid'>Eq</span><span class='hs-layout'>,</span> <span class='hs-conid'>Show</span><span class='hs-layout'>,</span> <span class='hs-conid'>Enum</span><span class='hs-layout'>,</span> <span class='hs-conid'>Typeable</span><span class='hs-layout'>)</span>
<a name="line-202"></a>
<a name="line-203"></a><a name="toNM"></a><span class='hs-definition'>toNM</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>UNormalizationMode</span>
<a name="line-204"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>None</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>1</span>
<a name="line-205"></a><span class='hs-comment'>{-# LINE 198 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-206"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFD</span>  <span class='hs-keyglyph'>=</span> <span class='hs-num'>2</span>
<a name="line-207"></a><span class='hs-comment'>{-# LINE 199 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-208"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFKD</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>3</span>
<a name="line-209"></a><span class='hs-comment'>{-# LINE 200 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-210"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFC</span>  <span class='hs-keyglyph'>=</span> <span class='hs-num'>4</span>
<a name="line-211"></a><span class='hs-comment'>{-# LINE 201 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-212"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFKC</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>5</span>
<a name="line-213"></a><span class='hs-comment'>{-# LINE 202 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-214"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>FCD</span>  <span class='hs-keyglyph'>=</span> <span class='hs-num'>6</span>
<a name="line-215"></a><span class='hs-comment'>{-# LINE 203 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-216"></a>
<a name="line-217"></a><a name="normalize"></a><span class='hs-comment'>-- | Normalize a string according the specified normalization mode.</span>
<a name="line-218"></a><span class='hs-definition'>normalize</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Text</span>
<a name="line-219"></a><span class='hs-definition'>normalize</span> <span class='hs-varid'>mode</span> <span class='hs-varid'>t</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>t</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>sptr</span> <span class='hs-varid'>slen</span> <span class='hs-keyglyph'>-&gt;</span>
<a name="line-220"></a>  <span class='hs-keyword'>let</span> <span class='hs-varid'>slen'</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>slen</span>
<a name="line-221"></a>      <span class='hs-varid'>mode'</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>toNM</span> <span class='hs-varid'>mode</span>
<a name="line-222"></a>  <span class='hs-keyword'>in</span> <span class='hs-varid'>handleOverflowError</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>slen</span><span class='hs-layout'>)</span>
<a name="line-223"></a>     <span class='hs-layout'>(</span><span class='hs-keyglyph'>\</span><span class='hs-varid'>dptr</span> <span class='hs-varid'>dlen</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-varid'>unorm_normalize</span> <span class='hs-varid'>sptr</span> <span class='hs-varid'>slen'</span> <span class='hs-varid'>mode'</span> <span class='hs-num'>0</span> <span class='hs-varid'>dptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>dlen</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-224"></a>     <span class='hs-layout'>(</span><span class='hs-keyglyph'>\</span><span class='hs-varid'>dptr</span> <span class='hs-varid'>dlen</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-varid'>fromPtr</span> <span class='hs-layout'>(</span><span class='hs-varid'>castPtr</span> <span class='hs-varid'>dptr</span><span class='hs-layout'>)</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>dlen</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-225"></a>
<a name="line-226"></a>
<a name="line-227"></a><a name="quickCheck"></a><span class='hs-comment'>-- | Perform an efficient check on a string, to quickly determine if</span>
<a name="line-228"></a><span class='hs-comment'>-- the string is in a particular normalization form.</span>
<a name="line-229"></a><span class='hs-comment'>--</span>
<a name="line-230"></a><span class='hs-comment'>-- A 'Nothing' result indicates that a definite answer could not be</span>
<a name="line-231"></a><span class='hs-comment'>-- determined quickly, and a more thorough check is required,</span>
<a name="line-232"></a><span class='hs-comment'>-- e.g. with 'isNormalized'.  The user may have to convert the string</span>
<a name="line-233"></a><span class='hs-comment'>-- to its normalized form and compare the results.</span>
<a name="line-234"></a><span class='hs-comment'>--</span>
<a name="line-235"></a><span class='hs-comment'>-- A result of 'Just' 'True' or 'Just' 'False' indicates that the</span>
<a name="line-236"></a><span class='hs-comment'>-- string definitely is, or is not, in the given normalization form.</span>
<a name="line-237"></a><span class='hs-definition'>quickCheck</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Maybe</span> <span class='hs-conid'>Bool</span>
<a name="line-238"></a><span class='hs-definition'>quickCheck</span> <span class='hs-varid'>mode</span> <span class='hs-varid'>t</span> <span class='hs-keyglyph'>=</span>
<a name="line-239"></a>  <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>t</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>ptr</span> <span class='hs-varid'>len</span> <span class='hs-keyglyph'>-&gt;</span>
<a name="line-240"></a>    <span class='hs-varid'>fmap</span> <span class='hs-varid'>toNCR</span> <span class='hs-varop'>.</span> <span class='hs-varid'>handleError</span> <span class='hs-varop'>$</span> <span class='hs-varid'>unorm_quickCheck</span> <span class='hs-varid'>ptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>len</span><span class='hs-layout'>)</span>
<a name="line-241"></a>                               <span class='hs-layout'>(</span><span class='hs-varid'>toNM</span> <span class='hs-varid'>mode</span><span class='hs-layout'>)</span>
<a name="line-242"></a>
<a name="line-243"></a><a name="isNormalized"></a><span class='hs-comment'>-- | Indicate whether a string is in a given normalization form.</span>
<a name="line-244"></a><span class='hs-comment'>--</span>
<a name="line-245"></a><span class='hs-comment'>-- Unlike 'quickCheck', this function returns a definitive result.</span>
<a name="line-246"></a><span class='hs-comment'>-- For 'NFD', 'NFKD', and 'FCD' normalization forms, both functions</span>
<a name="line-247"></a><span class='hs-comment'>-- work in exactly the same ways.  For 'NFC' and 'NFKC' forms, where</span>
<a name="line-248"></a><span class='hs-comment'>-- 'quickCheck' may return 'Nothing', this function will perform</span>
<a name="line-249"></a><span class='hs-comment'>-- further tests to arrive at a definitive result.</span>
<a name="line-250"></a><span class='hs-definition'>isNormalized</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Bool</span>
<a name="line-251"></a><span class='hs-definition'>isNormalized</span> <span class='hs-varid'>mode</span> <span class='hs-varid'>t</span> <span class='hs-keyglyph'>=</span>
<a name="line-252"></a>  <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>t</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>ptr</span> <span class='hs-varid'>len</span> <span class='hs-keyglyph'>-&gt;</span>
<a name="line-253"></a>    <span class='hs-varid'>fmap</span> <span class='hs-varid'>asBool</span> <span class='hs-varop'>.</span> <span class='hs-varid'>handleError</span> <span class='hs-varop'>$</span> <span class='hs-varid'>unorm_isNormalized</span> <span class='hs-varid'>ptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>len</span><span class='hs-layout'>)</span>
<a name="line-254"></a>                                <span class='hs-layout'>(</span><span class='hs-varid'>toNM</span> <span class='hs-varid'>mode</span><span class='hs-layout'>)</span>
<a name="line-255"></a>
<a name="line-256"></a><a name="compare"></a><span class='hs-comment'>-- | Compare two strings for canonical equivalence.  Further options</span>
<a name="line-257"></a><span class='hs-comment'>-- include case-insensitive comparison and code point order (as</span>
<a name="line-258"></a><span class='hs-comment'>-- opposed to code unit order).</span>
<a name="line-259"></a><span class='hs-comment'>--</span>
<a name="line-260"></a><span class='hs-comment'>-- Canonical equivalence between two strings is defined as their</span>
<a name="line-261"></a><span class='hs-comment'>-- normalized forms ('NFD' or 'NFC') being identical.  This function</span>
<a name="line-262"></a><span class='hs-comment'>-- compares strings incrementally instead of normalizing (and</span>
<a name="line-263"></a><span class='hs-comment'>-- optionally case-folding) both strings entirely, improving</span>
<a name="line-264"></a><span class='hs-comment'>-- performance significantly.</span>
<a name="line-265"></a><span class='hs-comment'>--</span>
<a name="line-266"></a><span class='hs-comment'>-- Bulk normalization is only necessary if the strings do not fulfill</span>
<a name="line-267"></a><span class='hs-comment'>-- the 'FCD' conditions. Only in this case, and only if the strings</span>
<a name="line-268"></a><span class='hs-comment'>-- are relatively long, is memory allocated temporarily.  For 'FCD'</span>
<a name="line-269"></a><span class='hs-comment'>-- strings and short non-'FCD' strings there is no memory allocation.</span>
<a name="line-270"></a><span class='hs-definition'>compare</span> <span class='hs-keyglyph'>::</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>CompareOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ordering</span>
<a name="line-271"></a><span class='hs-definition'>compare</span> <span class='hs-varid'>opts</span> <span class='hs-varid'>a</span> <span class='hs-varid'>b</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span>
<a name="line-272"></a>  <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>a</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>aptr</span> <span class='hs-varid'>alen</span> <span class='hs-keyglyph'>-&gt;</span>
<a name="line-273"></a>    <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>b</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>bptr</span> <span class='hs-varid'>blen</span> <span class='hs-keyglyph'>-&gt;</span>
<a name="line-274"></a>      <span class='hs-varid'>fmap</span> <span class='hs-varid'>asOrdering</span> <span class='hs-varop'>.</span> <span class='hs-varid'>handleError</span> <span class='hs-varop'>$</span>
<a name="line-275"></a>      <span class='hs-varid'>unorm_compare</span> <span class='hs-varid'>aptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>alen</span><span class='hs-layout'>)</span> <span class='hs-varid'>bptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>blen</span><span class='hs-layout'>)</span>
<a name="line-276"></a>                    <span class='hs-layout'>(</span><span class='hs-varid'>reduceCompareOptions</span> <span class='hs-varid'>opts</span><span class='hs-layout'>)</span>
<a name="line-277"></a>
<a name="line-278"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_compare"</span> <span class='hs-varid'>unorm_compare</span>
<a name="line-279"></a>    <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Word32</span>
<a name="line-280"></a>    <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>IO</span> <span class='hs-conid'>Int32</span>
<a name="line-281"></a>
<a name="line-282"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_quickCheck"</span> <span class='hs-varid'>unorm_quickCheck</span>
<a name="line-283"></a>    <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span>
<a name="line-284"></a>    <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>IO</span> <span class='hs-conid'>UNormalizationCheckResult</span>
<a name="line-285"></a>
<a name="line-286"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_isNormalized"</span> <span class='hs-varid'>unorm_isNormalized</span>
<a name="line-287"></a>    <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>IO</span> <span class='hs-conid'>UBool</span>
<a name="line-288"></a>
<a name="line-289"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_normalize"</span> <span class='hs-varid'>unorm_normalize</span>
<a name="line-290"></a>    <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span>
<a name="line-291"></a>    <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>IO</span> <span class='hs-conid'>Int32</span>
</pre></body>
</html>