/usr/share/doc/libghc-text-icu-doc/html/src/Data-Text-ICU-Normalize.html is in libghc-text-icu-doc 0.6.3.7-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<!-- Generated by HsColour, http://code.haskell.org/~malcolm/hscolour/ -->
<title>dist-ghc/build/Data/Text/ICU/Normalize.hs</title>
<link type='text/css' rel='stylesheet' href='hscolour.css' />
</head>
<body>
<pre><a name="line-1"></a><span class='hs-comment'>{-# LINE 1 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-2"></a><span class='hs-comment'>{-# LANGUAGE CPP, DeriveDataTypeable, ForeignFunctionInterface #-}</span>
<a name="line-3"></a><span class='hs-comment'>{-# LINE 2 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-4"></a><span class='hs-comment'>-- |</span>
<a name="line-5"></a><span class='hs-comment'>-- Module : Data.Text.ICU.Normalize</span>
<a name="line-6"></a><span class='hs-comment'>-- Copyright : (c) 2009, 2010 Bryan O'Sullivan</span>
<a name="line-7"></a><span class='hs-comment'>--</span>
<a name="line-8"></a><span class='hs-comment'>-- License : BSD-style</span>
<a name="line-9"></a><span class='hs-comment'>-- Maintainer : bos@serpentine.com</span>
<a name="line-10"></a><span class='hs-comment'>-- Stability : experimental</span>
<a name="line-11"></a><span class='hs-comment'>-- Portability : GHC</span>
<a name="line-12"></a><span class='hs-comment'>--</span>
<a name="line-13"></a><span class='hs-comment'>-- Character set normalization functions for Unicode, implemented as</span>
<a name="line-14"></a><span class='hs-comment'>-- bindings to the International Components for Unicode (ICU)</span>
<a name="line-15"></a><span class='hs-comment'>-- libraries.</span>
<a name="line-16"></a>
<a name="line-17"></a><span class='hs-keyword'>module</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Normalize</span>
<a name="line-18"></a> <span class='hs-layout'>(</span>
<a name="line-19"></a> <span class='hs-comment'>-- * Unicode normalization API</span>
<a name="line-20"></a> <span class='hs-comment'>-- $api</span>
<a name="line-21"></a> <span class='hs-conid'>NormalizationMode</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span>
<a name="line-22"></a> <span class='hs-comment'>-- * Normalization functions</span>
<a name="line-23"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>normalize</span>
<a name="line-24"></a> <span class='hs-comment'>-- * Normalization checks</span>
<a name="line-25"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>quickCheck</span>
<a name="line-26"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>isNormalized</span>
<a name="line-27"></a> <span class='hs-comment'>-- * Normalization-sensitive comparison</span>
<a name="line-28"></a> <span class='hs-layout'>,</span> <span class='hs-conid'>CompareOption</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span>
<a name="line-29"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>compare</span>
<a name="line-30"></a> <span class='hs-layout'>)</span> <span class='hs-keyword'>where</span>
<a name="line-31"></a>
<a name="line-32"></a>
<a name="line-33"></a><span class='hs-comment'>{-# LINE 33 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-34"></a>
<a name="line-35"></a>
<a name="line-36"></a><span class='hs-comment'>{-# LINE 35 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-37"></a>
<a name="line-38"></a><span class='hs-comment'>{-# LINE 36 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-39"></a>
<a name="line-40"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span> <span class='hs-layout'>(</span><span class='hs-conid'>Text</span><span class='hs-layout'>)</span>
<a name="line-41"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Foreign</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromPtr</span><span class='hs-layout'>,</span> <span class='hs-varid'>useAsPtr</span><span class='hs-layout'>)</span>
<a name="line-42"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Error</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>UErrorCode</span><span class='hs-layout'>,</span> <span class='hs-varid'>handleError</span><span class='hs-layout'>,</span> <span class='hs-varid'>handleOverflowError</span><span class='hs-layout'>)</span>
<a name="line-43"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>UBool</span><span class='hs-layout'>,</span> <span class='hs-conid'>UChar</span><span class='hs-layout'>,</span> <span class='hs-varid'>asBool</span><span class='hs-layout'>,</span> <span class='hs-varid'>asOrdering</span><span class='hs-layout'>)</span>
<a name="line-44"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>ICU</span><span class='hs-varop'>.</span><span class='hs-conid'>Normalize</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-layout'>(</span><span class='hs-conid'>UNormalizationCheckResult</span><span class='hs-layout'>,</span> <span class='hs-varid'>toNCR</span><span class='hs-layout'>)</span>
<a name="line-45"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Typeable</span> <span class='hs-layout'>(</span><span class='hs-conid'>Typeable</span><span class='hs-layout'>)</span>
<a name="line-46"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Int</span> <span class='hs-layout'>(</span><span class='hs-conid'>Int32</span><span class='hs-layout'>)</span>
<a name="line-47"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Word</span> <span class='hs-layout'>(</span><span class='hs-conid'>Word32</span><span class='hs-layout'>)</span>
<a name="line-48"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>C</span><span class='hs-varop'>.</span><span class='hs-conid'>Types</span> <span class='hs-layout'>(</span><span class='hs-conid'>CInt</span><span class='hs-layout'>(</span><span class='hs-keyglyph'>..</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-49"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>Ptr</span> <span class='hs-layout'>(</span><span class='hs-conid'>Ptr</span><span class='hs-layout'>,</span> <span class='hs-varid'>castPtr</span><span class='hs-layout'>)</span>
<a name="line-50"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>System</span><span class='hs-varop'>.</span><span class='hs-conid'>IO</span><span class='hs-varop'>.</span><span class='hs-conid'>Unsafe</span> <span class='hs-layout'>(</span><span class='hs-varid'>unsafePerformIO</span><span class='hs-layout'>)</span>
<a name="line-51"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Prelude</span> <span class='hs-varid'>hiding</span> <span class='hs-layout'>(</span><span class='hs-varid'>compare</span><span class='hs-layout'>)</span>
<a name="line-52"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>List</span> <span class='hs-layout'>(</span><span class='hs-varid'>foldl'</span><span class='hs-layout'>)</span>
<a name="line-53"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>Bits</span> <span class='hs-layout'>(</span><span class='hs-layout'>(</span><span class='hs-varop'>.|.</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-54"></a>
<a name="line-55"></a><span class='hs-comment'>-- $api</span>
<a name="line-56"></a><span class='hs-comment'>--</span>
<a name="line-57"></a><span class='hs-comment'>-- The 'normalize' function transforms Unicode text into an equivalent</span>
<a name="line-58"></a><span class='hs-comment'>-- composed or decomposed form, allowing for easier sorting and</span>
<a name="line-59"></a><span class='hs-comment'>-- searching of text. 'normalize' supports the standard normalization</span>
<a name="line-60"></a><span class='hs-comment'>-- forms described in <<a href="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>>,</span>
<a name="line-61"></a><span class='hs-comment'>-- Unicode Standard Annex #15: Unicode Normalization Forms.</span>
<a name="line-62"></a><span class='hs-comment'>--</span>
<a name="line-63"></a><span class='hs-comment'>-- Characters with accents or other adornments can be encoded in</span>
<a name="line-64"></a><span class='hs-comment'>-- several different ways in Unicode. For example, take the character A-acute.</span>
<a name="line-65"></a><span class='hs-comment'>-- In Unicode, this can be encoded as a single character (the</span>
<a name="line-66"></a><span class='hs-comment'>-- \"composed\" form):</span>
<a name="line-67"></a><span class='hs-comment'>--</span>
<a name="line-68"></a><span class='hs-comment'>-- @</span>
<a name="line-69"></a><span class='hs-comment'>-- 00C1 LATIN CAPITAL LETTER A WITH ACUTE</span>
<a name="line-70"></a><span class='hs-comment'>-- @</span>
<a name="line-71"></a><span class='hs-comment'>--</span>
<a name="line-72"></a><span class='hs-comment'>-- or as two separate characters (the \"decomposed\" form):</span>
<a name="line-73"></a><span class='hs-comment'>--</span>
<a name="line-74"></a><span class='hs-comment'>-- @</span>
<a name="line-75"></a><span class='hs-comment'>-- 0041 LATIN CAPITAL LETTER A</span>
<a name="line-76"></a><span class='hs-comment'>-- 0301 COMBINING ACUTE ACCENT</span>
<a name="line-77"></a><span class='hs-comment'>-- @</span>
<a name="line-78"></a><span class='hs-comment'>--</span>
<a name="line-79"></a><span class='hs-comment'>-- To a user of your program, however, both of these sequences should</span>
<a name="line-80"></a><span class='hs-comment'>-- be treated as the same \"user-level\" character \"A with acute</span>
<a name="line-81"></a><span class='hs-comment'>-- accent\". When you are searching or comparing text, you must</span>
<a name="line-82"></a><span class='hs-comment'>-- ensure that these two sequences are treated equivalently. In</span>
<a name="line-83"></a><span class='hs-comment'>-- addition, you must handle characters with more than one accent.</span>
<a name="line-84"></a><span class='hs-comment'>-- Sometimes the order of a character's combining accents is</span>
<a name="line-85"></a><span class='hs-comment'>-- significant, while in other cases accent sequences in different</span>
<a name="line-86"></a><span class='hs-comment'>-- orders are really equivalent.</span>
<a name="line-87"></a><span class='hs-comment'>--</span>
<a name="line-88"></a><span class='hs-comment'>-- Similarly, the string \"ffi\" can be encoded as three separate letters:</span>
<a name="line-89"></a><span class='hs-comment'>--</span>
<a name="line-90"></a><span class='hs-comment'>-- @</span>
<a name="line-91"></a><span class='hs-comment'>-- 0066 LATIN SMALL LETTER F</span>
<a name="line-92"></a><span class='hs-comment'>-- 0066 LATIN SMALL LETTER F</span>
<a name="line-93"></a><span class='hs-comment'>-- 0069 LATIN SMALL LETTER I</span>
<a name="line-94"></a><span class='hs-comment'>-- @</span>
<a name="line-95"></a><span class='hs-comment'>--</span>
<a name="line-96"></a><span class='hs-comment'>-- or as the single character</span>
<a name="line-97"></a><span class='hs-comment'>--</span>
<a name="line-98"></a><span class='hs-comment'>-- @</span>
<a name="line-99"></a><span class='hs-comment'>-- FB03 LATIN SMALL LIGATURE FFI</span>
<a name="line-100"></a><span class='hs-comment'>-- @</span>
<a name="line-101"></a><span class='hs-comment'>--</span>
<a name="line-102"></a><span class='hs-comment'>-- The \"ffi\" ligature is not a distinct semantic character, and</span>
<a name="line-103"></a><span class='hs-comment'>-- strictly speaking it shouldn't be in Unicode at all, but it was</span>
<a name="line-104"></a><span class='hs-comment'>-- included for compatibility with existing character sets that</span>
<a name="line-105"></a><span class='hs-comment'>-- already provided it. The Unicode standard identifies such</span>
<a name="line-106"></a><span class='hs-comment'>-- characters by giving them \"compatibility\" decompositions into the</span>
<a name="line-107"></a><span class='hs-comment'>-- corresponding semantic characters. When sorting and searching, you</span>
<a name="line-108"></a><span class='hs-comment'>-- will often want to use these mappings.</span>
<a name="line-109"></a><span class='hs-comment'>--</span>
<a name="line-110"></a><span class='hs-comment'>-- 'normalize' helps solve these problems by transforming text into</span>
<a name="line-111"></a><span class='hs-comment'>-- the canonical composed and decomposed forms as shown in the first</span>
<a name="line-112"></a><span class='hs-comment'>-- example above. In addition, you can have it perform compatibility</span>
<a name="line-113"></a><span class='hs-comment'>-- decompositions so that you can treat compatibility characters the</span>
<a name="line-114"></a><span class='hs-comment'>-- same as their equivalents. Finally, 'normalize' rearranges accents</span>
<a name="line-115"></a><span class='hs-comment'>-- into the proper canonical order, so that you do not have to worry</span>
<a name="line-116"></a><span class='hs-comment'>-- about accent rearrangement on your own.</span>
<a name="line-117"></a><span class='hs-comment'>--</span>
<a name="line-118"></a><span class='hs-comment'>-- Form 'FCD', \"Fast C or D\", is also designed for collation. It</span>
<a name="line-119"></a><span class='hs-comment'>-- allows to work on strings that are not necessarily normalized with</span>
<a name="line-120"></a><span class='hs-comment'>-- an algorithm (like in collation) that works under \"canonical</span>
<a name="line-121"></a><span class='hs-comment'>-- closure\", i.e., it treats precomposed characters and their</span>
<a name="line-122"></a><span class='hs-comment'>-- decomposed equivalents the same.</span>
<a name="line-123"></a><span class='hs-comment'>--</span>
<a name="line-124"></a><span class='hs-comment'>-- It is not a normalization form because it does not provide for</span>
<a name="line-125"></a><span class='hs-comment'>-- uniqueness of representation. Multiple strings may be canonically</span>
<a name="line-126"></a><span class='hs-comment'>-- equivalent (their NFDs are identical) and may all conform to 'FCD'</span>
<a name="line-127"></a><span class='hs-comment'>-- without being identical themselves.</span>
<a name="line-128"></a><span class='hs-comment'>--</span>
<a name="line-129"></a><span class='hs-comment'>-- The form is defined such that the \"raw decomposition\", the</span>
<a name="line-130"></a><span class='hs-comment'>-- recursive canonical decomposition of each character, results in a</span>
<a name="line-131"></a><span class='hs-comment'>-- string that is canonically ordered. This means that precomposed</span>
<a name="line-132"></a><span class='hs-comment'>-- characters are allowed for as long as their decompositions do not</span>
<a name="line-133"></a><span class='hs-comment'>-- need canonical reordering.</span>
<a name="line-134"></a><span class='hs-comment'>--</span>
<a name="line-135"></a><span class='hs-comment'>-- Its advantage for a process like collation is that all 'NFD' and</span>
<a name="line-136"></a><span class='hs-comment'>-- most 'NFC' texts - and many unnormalized texts - already conform to</span>
<a name="line-137"></a><span class='hs-comment'>-- 'FCD' and do not need to be normalized ('NFD') for such a</span>
<a name="line-138"></a><span class='hs-comment'>-- process. The 'FCD' 'quickCheck' will return 'Yes' for most strings</span>
<a name="line-139"></a><span class='hs-comment'>-- in practice.</span>
<a name="line-140"></a><span class='hs-comment'>--</span>
<a name="line-141"></a><span class='hs-comment'>-- @'normalize' 'FCD'@ may be implemented with 'NFD'.</span>
<a name="line-142"></a><span class='hs-comment'>--</span>
<a name="line-143"></a><span class='hs-comment'>-- For more details on 'FCD' see the collation design document:</span>
<a name="line-144"></a><span class='hs-comment'>-- <<a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm</a>></span>
<a name="line-145"></a><span class='hs-comment'>--</span>
<a name="line-146"></a><span class='hs-comment'>-- ICU collation performs either 'NFD' or 'FCD' normalization</span>
<a name="line-147"></a><span class='hs-comment'>-- automatically if normalization is turned on for the collator</span>
<a name="line-148"></a><span class='hs-comment'>-- object. Beyond collation and string search, normalized strings may</span>
<a name="line-149"></a><span class='hs-comment'>-- be useful for string equivalence comparisons,</span>
<a name="line-150"></a><span class='hs-comment'>-- transliteration/transcription, unique representations, etc.</span>
<a name="line-151"></a><span class='hs-comment'>--</span>
<a name="line-152"></a><span class='hs-comment'>-- The W3C generally recommends to exchange texts in 'NFC'. Note also</span>
<a name="line-153"></a><span class='hs-comment'>-- that most legacy character encodings use only precomposed forms and</span>
<a name="line-154"></a><span class='hs-comment'>-- often do not encode any combining marks by themselves. For</span>
<a name="line-155"></a><span class='hs-comment'>-- conversion to such character encodings the Unicode text needs to be</span>
<a name="line-156"></a><span class='hs-comment'>-- normalized to 'NFC'. For more usage examples, see the Unicode</span>
<a name="line-157"></a><span class='hs-comment'>-- Standard Annex.</span>
<a name="line-158"></a>
<a name="line-159"></a><a name="UCompareOption"></a><span class='hs-keyword'>type</span> <span class='hs-conid'>UCompareOption</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>Word32</span>
<a name="line-160"></a>
<a name="line-161"></a><a name="CompareOption"></a><span class='hs-comment'>-- | Options to 'compare'.</span>
<a name="line-162"></a><a name="CompareOption"></a><span class='hs-keyword'>data</span> <span class='hs-conid'>CompareOption</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>InputIsFCD</span>
<a name="line-163"></a> <span class='hs-comment'>-- ^ The caller knows that both strings fulfill the</span>
<a name="line-164"></a> <span class='hs-comment'>-- 'FCD' conditions. If /not/ set, 'compare' will</span>
<a name="line-165"></a> <span class='hs-comment'>-- 'quickCheck' for 'FCD' and normalize if</span>
<a name="line-166"></a> <span class='hs-comment'>-- necessary.</span>
<a name="line-167"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>CompareIgnoreCase</span>
<a name="line-168"></a> <span class='hs-comment'>-- ^ Compare strings case-insensitively using case</span>
<a name="line-169"></a> <span class='hs-comment'>-- folding, instead of case-sensitively. If set,</span>
<a name="line-170"></a> <span class='hs-comment'>-- then the following case folding options are</span>
<a name="line-171"></a> <span class='hs-comment'>-- used.</span>
<a name="line-172"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>FoldCaseExcludeSpecialI</span>
<a name="line-173"></a> <span class='hs-comment'>-- ^ When case folding, exclude the special I</span>
<a name="line-174"></a> <span class='hs-comment'>-- character. For use with Turkic</span>
<a name="line-175"></a> <span class='hs-comment'>-- (Turkish/Azerbaijani) text data.</span>
<a name="line-176"></a> <span class='hs-keyword'>deriving</span> <span class='hs-layout'>(</span><span class='hs-conid'>Eq</span><span class='hs-layout'>,</span> <span class='hs-conid'>Show</span><span class='hs-layout'>,</span> <span class='hs-conid'>Enum</span><span class='hs-layout'>,</span> <span class='hs-conid'>Typeable</span><span class='hs-layout'>)</span>
<a name="line-177"></a>
<a name="line-178"></a><a name="fromCompareOption"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>CompareOption</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>UCompareOption</span>
<a name="line-179"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-conid'>InputIsFCD</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>131072</span>
<a name="line-180"></a><span class='hs-comment'>{-# LINE 177 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-181"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-conid'>CompareIgnoreCase</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>65536</span>
<a name="line-182"></a><span class='hs-comment'>{-# LINE 178 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-183"></a><span class='hs-definition'>fromCompareOption</span> <span class='hs-conid'>FoldCaseExcludeSpecialI</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>1</span>
<a name="line-184"></a><span class='hs-comment'>{-# LINE 179 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-185"></a>
<a name="line-186"></a><a name="reduceCompareOptions"></a><span class='hs-definition'>reduceCompareOptions</span> <span class='hs-keyglyph'>::</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>CompareOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>UCompareOption</span>
<a name="line-187"></a><span class='hs-definition'>reduceCompareOptions</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>foldl'</span> <span class='hs-varid'>orO</span> <span class='hs-layout'>(</span><span class='hs-num'>32768</span><span class='hs-layout'>)</span>
<a name="line-188"></a><span class='hs-comment'>{-# LINE 182 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-189"></a> <span class='hs-keyword'>where</span> <span class='hs-varid'>a</span> <span class='hs-varop'>`orO`</span> <span class='hs-varid'>b</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>a</span> <span class='hs-varop'>.|.</span> <span class='hs-varid'>fromCompareOption</span> <span class='hs-varid'>b</span>
<a name="line-190"></a>
<a name="line-191"></a><a name="UNormalizationMode"></a><span class='hs-keyword'>type</span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>CInt</span>
<a name="line-192"></a>
<a name="line-193"></a><a name="NormalizationMode"></a><span class='hs-comment'>-- | Normalization modes.</span>
<a name="line-194"></a><a name="NormalizationMode"></a><span class='hs-keyword'>data</span> <span class='hs-conid'>NormalizationMode</span>
<a name="line-195"></a> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>None</span> <span class='hs-comment'>-- ^ No decomposition/composition.</span>
<a name="line-196"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFD</span> <span class='hs-comment'>-- ^ Canonical decomposition.</span>
<a name="line-197"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFKD</span> <span class='hs-comment'>-- ^ Compatibility decomposition.</span>
<a name="line-198"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFC</span> <span class='hs-comment'>-- ^ Canonical decomposition followed by canonical composition.</span>
<a name="line-199"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>NFKC</span> <span class='hs-comment'>-- ^ Compatibility decomposition followed by canonical composition.</span>
<a name="line-200"></a> <span class='hs-keyglyph'>|</span> <span class='hs-conid'>FCD</span> <span class='hs-comment'>-- ^ \"Fast C or D\" form.</span>
<a name="line-201"></a> <span class='hs-keyword'>deriving</span> <span class='hs-layout'>(</span><span class='hs-conid'>Eq</span><span class='hs-layout'>,</span> <span class='hs-conid'>Show</span><span class='hs-layout'>,</span> <span class='hs-conid'>Enum</span><span class='hs-layout'>,</span> <span class='hs-conid'>Typeable</span><span class='hs-layout'>)</span>
<a name="line-202"></a>
<a name="line-203"></a><a name="toNM"></a><span class='hs-definition'>toNM</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>UNormalizationMode</span>
<a name="line-204"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>None</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>1</span>
<a name="line-205"></a><span class='hs-comment'>{-# LINE 198 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-206"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFD</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>2</span>
<a name="line-207"></a><span class='hs-comment'>{-# LINE 199 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-208"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFKD</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>3</span>
<a name="line-209"></a><span class='hs-comment'>{-# LINE 200 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-210"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFC</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>4</span>
<a name="line-211"></a><span class='hs-comment'>{-# LINE 201 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-212"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>NFKC</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>5</span>
<a name="line-213"></a><span class='hs-comment'>{-# LINE 202 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-214"></a><span class='hs-definition'>toNM</span> <span class='hs-conid'>FCD</span> <span class='hs-keyglyph'>=</span> <span class='hs-num'>6</span>
<a name="line-215"></a><span class='hs-comment'>{-# LINE 203 "Data/Text/ICU/Normalize.hsc" #-}</span>
<a name="line-216"></a>
<a name="line-217"></a><a name="normalize"></a><span class='hs-comment'>-- | Normalize a string according the specified normalization mode.</span>
<a name="line-218"></a><span class='hs-definition'>normalize</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span>
<a name="line-219"></a><span class='hs-definition'>normalize</span> <span class='hs-varid'>mode</span> <span class='hs-varid'>t</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>t</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>sptr</span> <span class='hs-varid'>slen</span> <span class='hs-keyglyph'>-></span>
<a name="line-220"></a> <span class='hs-keyword'>let</span> <span class='hs-varid'>slen'</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>slen</span>
<a name="line-221"></a> <span class='hs-varid'>mode'</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>toNM</span> <span class='hs-varid'>mode</span>
<a name="line-222"></a> <span class='hs-keyword'>in</span> <span class='hs-varid'>handleOverflowError</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>slen</span><span class='hs-layout'>)</span>
<a name="line-223"></a> <span class='hs-layout'>(</span><span class='hs-keyglyph'>\</span><span class='hs-varid'>dptr</span> <span class='hs-varid'>dlen</span> <span class='hs-keyglyph'>-></span> <span class='hs-varid'>unorm_normalize</span> <span class='hs-varid'>sptr</span> <span class='hs-varid'>slen'</span> <span class='hs-varid'>mode'</span> <span class='hs-num'>0</span> <span class='hs-varid'>dptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>dlen</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-224"></a> <span class='hs-layout'>(</span><span class='hs-keyglyph'>\</span><span class='hs-varid'>dptr</span> <span class='hs-varid'>dlen</span> <span class='hs-keyglyph'>-></span> <span class='hs-varid'>fromPtr</span> <span class='hs-layout'>(</span><span class='hs-varid'>castPtr</span> <span class='hs-varid'>dptr</span><span class='hs-layout'>)</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>dlen</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span>
<a name="line-225"></a>
<a name="line-226"></a>
<a name="line-227"></a><a name="quickCheck"></a><span class='hs-comment'>-- | Perform an efficient check on a string, to quickly determine if</span>
<a name="line-228"></a><span class='hs-comment'>-- the string is in a particular normalization form.</span>
<a name="line-229"></a><span class='hs-comment'>--</span>
<a name="line-230"></a><span class='hs-comment'>-- A 'Nothing' result indicates that a definite answer could not be</span>
<a name="line-231"></a><span class='hs-comment'>-- determined quickly, and a more thorough check is required,</span>
<a name="line-232"></a><span class='hs-comment'>-- e.g. with 'isNormalized'. The user may have to convert the string</span>
<a name="line-233"></a><span class='hs-comment'>-- to its normalized form and compare the results.</span>
<a name="line-234"></a><span class='hs-comment'>--</span>
<a name="line-235"></a><span class='hs-comment'>-- A result of 'Just' 'True' or 'Just' 'False' indicates that the</span>
<a name="line-236"></a><span class='hs-comment'>-- string definitely is, or is not, in the given normalization form.</span>
<a name="line-237"></a><span class='hs-definition'>quickCheck</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Maybe</span> <span class='hs-conid'>Bool</span>
<a name="line-238"></a><span class='hs-definition'>quickCheck</span> <span class='hs-varid'>mode</span> <span class='hs-varid'>t</span> <span class='hs-keyglyph'>=</span>
<a name="line-239"></a> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>t</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>ptr</span> <span class='hs-varid'>len</span> <span class='hs-keyglyph'>-></span>
<a name="line-240"></a> <span class='hs-varid'>fmap</span> <span class='hs-varid'>toNCR</span> <span class='hs-varop'>.</span> <span class='hs-varid'>handleError</span> <span class='hs-varop'>$</span> <span class='hs-varid'>unorm_quickCheck</span> <span class='hs-varid'>ptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>len</span><span class='hs-layout'>)</span>
<a name="line-241"></a> <span class='hs-layout'>(</span><span class='hs-varid'>toNM</span> <span class='hs-varid'>mode</span><span class='hs-layout'>)</span>
<a name="line-242"></a>
<a name="line-243"></a><a name="isNormalized"></a><span class='hs-comment'>-- | Indicate whether a string is in a given normalization form.</span>
<a name="line-244"></a><span class='hs-comment'>--</span>
<a name="line-245"></a><span class='hs-comment'>-- Unlike 'quickCheck', this function returns a definitive result.</span>
<a name="line-246"></a><span class='hs-comment'>-- For 'NFD', 'NFKD', and 'FCD' normalization forms, both functions</span>
<a name="line-247"></a><span class='hs-comment'>-- work in exactly the same ways. For 'NFC' and 'NFKC' forms, where</span>
<a name="line-248"></a><span class='hs-comment'>-- 'quickCheck' may return 'Nothing', this function will perform</span>
<a name="line-249"></a><span class='hs-comment'>-- further tests to arrive at a definitive result.</span>
<a name="line-250"></a><span class='hs-definition'>isNormalized</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>NormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Bool</span>
<a name="line-251"></a><span class='hs-definition'>isNormalized</span> <span class='hs-varid'>mode</span> <span class='hs-varid'>t</span> <span class='hs-keyglyph'>=</span>
<a name="line-252"></a> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>t</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>ptr</span> <span class='hs-varid'>len</span> <span class='hs-keyglyph'>-></span>
<a name="line-253"></a> <span class='hs-varid'>fmap</span> <span class='hs-varid'>asBool</span> <span class='hs-varop'>.</span> <span class='hs-varid'>handleError</span> <span class='hs-varop'>$</span> <span class='hs-varid'>unorm_isNormalized</span> <span class='hs-varid'>ptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>len</span><span class='hs-layout'>)</span>
<a name="line-254"></a> <span class='hs-layout'>(</span><span class='hs-varid'>toNM</span> <span class='hs-varid'>mode</span><span class='hs-layout'>)</span>
<a name="line-255"></a>
<a name="line-256"></a><a name="compare"></a><span class='hs-comment'>-- | Compare two strings for canonical equivalence. Further options</span>
<a name="line-257"></a><span class='hs-comment'>-- include case-insensitive comparison and code point order (as</span>
<a name="line-258"></a><span class='hs-comment'>-- opposed to code unit order).</span>
<a name="line-259"></a><span class='hs-comment'>--</span>
<a name="line-260"></a><span class='hs-comment'>-- Canonical equivalence between two strings is defined as their</span>
<a name="line-261"></a><span class='hs-comment'>-- normalized forms ('NFD' or 'NFC') being identical. This function</span>
<a name="line-262"></a><span class='hs-comment'>-- compares strings incrementally instead of normalizing (and</span>
<a name="line-263"></a><span class='hs-comment'>-- optionally case-folding) both strings entirely, improving</span>
<a name="line-264"></a><span class='hs-comment'>-- performance significantly.</span>
<a name="line-265"></a><span class='hs-comment'>--</span>
<a name="line-266"></a><span class='hs-comment'>-- Bulk normalization is only necessary if the strings do not fulfill</span>
<a name="line-267"></a><span class='hs-comment'>-- the 'FCD' conditions. Only in this case, and only if the strings</span>
<a name="line-268"></a><span class='hs-comment'>-- are relatively long, is memory allocated temporarily. For 'FCD'</span>
<a name="line-269"></a><span class='hs-comment'>-- strings and short non-'FCD' strings there is no memory allocation.</span>
<a name="line-270"></a><span class='hs-definition'>compare</span> <span class='hs-keyglyph'>::</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>CompareOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Text</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ordering</span>
<a name="line-271"></a><span class='hs-definition'>compare</span> <span class='hs-varid'>opts</span> <span class='hs-varid'>a</span> <span class='hs-varid'>b</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>.</span>
<a name="line-272"></a> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>a</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>aptr</span> <span class='hs-varid'>alen</span> <span class='hs-keyglyph'>-></span>
<a name="line-273"></a> <span class='hs-varid'>useAsPtr</span> <span class='hs-varid'>b</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>bptr</span> <span class='hs-varid'>blen</span> <span class='hs-keyglyph'>-></span>
<a name="line-274"></a> <span class='hs-varid'>fmap</span> <span class='hs-varid'>asOrdering</span> <span class='hs-varop'>.</span> <span class='hs-varid'>handleError</span> <span class='hs-varop'>$</span>
<a name="line-275"></a> <span class='hs-varid'>unorm_compare</span> <span class='hs-varid'>aptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>alen</span><span class='hs-layout'>)</span> <span class='hs-varid'>bptr</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>blen</span><span class='hs-layout'>)</span>
<a name="line-276"></a> <span class='hs-layout'>(</span><span class='hs-varid'>reduceCompareOptions</span> <span class='hs-varid'>opts</span><span class='hs-layout'>)</span>
<a name="line-277"></a>
<a name="line-278"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_compare"</span> <span class='hs-varid'>unorm_compare</span>
<a name="line-279"></a> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Word32</span>
<a name="line-280"></a> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>IO</span> <span class='hs-conid'>Int32</span>
<a name="line-281"></a>
<a name="line-282"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_quickCheck"</span> <span class='hs-varid'>unorm_quickCheck</span>
<a name="line-283"></a> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span>
<a name="line-284"></a> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>IO</span> <span class='hs-conid'>UNormalizationCheckResult</span>
<a name="line-285"></a>
<a name="line-286"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_isNormalized"</span> <span class='hs-varid'>unorm_isNormalized</span>
<a name="line-287"></a> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>IO</span> <span class='hs-conid'>UBool</span>
<a name="line-288"></a>
<a name="line-289"></a><span class='hs-keyword'>foreign</span> <span class='hs-keyword'>import</span> <span class='hs-keyword'>ccall</span> <span class='hs-keyword'>unsafe</span> <span class='hs-str'>"hs_text_icu.h __hs_unorm_normalize"</span> <span class='hs-varid'>unorm_normalize</span>
<a name="line-290"></a> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>UNormalizationMode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span>
<a name="line-291"></a> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UChar</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int32</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>UErrorCode</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>IO</span> <span class='hs-conid'>Int32</span>
</pre></body>
</html>
|