2 using System.Globalization;
4 using System.Runtime.CompilerServices;
6 using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
8 namespace Mono.Globalization.Unicode
10 internal enum NormalizationCheck {
16 internal unsafe class Normalization
18 public const int NoNfd = 1;
19 public const int NoNfkd = 2;
20 public const int NoNfc = 4;
21 public const int MaybeNfc = 8;
22 public const int NoNfkc = 16;
23 public const int MaybeNfkc = 32;
24 public const int FullCompositionExclusion = 64;
25 public const int IsUnsafe = 128;
26 // public const int ExpandOnNfd = 256;
27 // public const int ExpandOnNfc = 512;
28 // public const int ExpandOnNfkd = 1024;
29 // public const int ExpandOnNfkc = 2048;
31 static uint PropValue (int cp)
33 return props [NUtil.PropIdx (cp)];
36 static int CharMapIdx (int cp)
38 return charMapIndex [NUtil.MapIdx (cp)];
41 static int GetNormalizedStringLength (int ch)
43 int start = charMapIndex [NUtil.MapIdx (ch)];
45 while (mappedChars [i] != 0)
50 static byte GetCombiningClass (int c)
52 return combiningClass [NUtil.Combining.ToIndex (c)];
55 static int GetPrimaryCompositeFromMapIndex (int src)
57 return mapIdxToComposite [NUtil.Composite.ToIndex (src)];
60 static int GetPrimaryCompositeHelperIndex (int cp)
62 return helperIndex [NUtil.Helper.ToIndex (cp)];
65 static int GetPrimaryCompositeCharIndex (object chars, int start)
67 string s = chars as string;
68 StringBuilder sb = chars as StringBuilder;
69 char startCh = s != null ? s [start] : sb [start];
70 int charsLength = sb != null ? sb.Length : s.Length;
72 int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
75 while (mappedChars [idx] == startCh) {
77 int combiningClass = 0;
78 for (int i = 1, j = 1; ; i++, j++) {
79 prevCB = combiningClass;
81 if (mappedChars [idx + i] == 0)
84 if (start + i >= charsLength)
85 return 0; // didn't match
87 // handle blocked characters here.
94 combiningClass = GetCombiningClass (curCh);
95 if (mappedChars [idx + i] == curCh) {
99 if (combiningClass < prevCB) // blocked. Give up this map entry.
101 if (++j + start >= charsLength || combiningClass == 0)
106 continue; // check next character in the current map entry string.
107 if (prevCB < combiningClass) {
109 if (mappedChars [idx + i] == curCh)
111 //if (mappedChars [idx + i] > curCh)
112 // return 0; // no match
114 // otherwise move idx to next item
115 while (mappedChars [i] != 0)
121 // reached to end of entries
125 private static string Compose (string source, int checkType)
127 StringBuilder sb = null;
128 Decompose (source, ref sb, checkType);
130 sb = Combine (source, 0, checkType);
132 Combine (sb, 0, checkType);
134 return sb != null ? sb.ToString () : source;
137 private static StringBuilder Combine (string source, int start, int checkType)
139 for (int i = 0; i < source.Length; i++) {
140 if (QuickCheck (source [i], checkType) == NormalizationCheck.Yes)
142 StringBuilder sb = new StringBuilder (source.Length + source.Length / 10);
144 Combine (sb, i, checkType);
150 private static bool CanBePrimaryComposite (int i)
152 if (i >= 0x3400 && i <= 0x9FBB)
153 return GetPrimaryCompositeHelperIndex (i) != 0;
154 return (PropValue (i) & IsUnsafe) != 0;
157 private static void Combine (StringBuilder sb, int start, int checkType)
159 for (int i = start; i < sb.Length; i++) {
160 if (QuickCheck (sb [i], checkType) == NormalizationCheck.Yes)
164 // FIXME: It should check "blocked" too
165 for (;i > 0; i--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
166 if (GetCombiningClass ((int) sb [i]) == 0)
169 int idx = 0; // index to mappedChars
170 for (; i < cur; i++) {
171 idx = GetPrimaryCompositeMapIndex (sb, (int) sb [i], i);
180 int prim = GetPrimaryCompositeFromMapIndex (idx);
181 int len = GetNormalizedStringLength (prim);
182 if (prim == 0 || len == 0)
183 throw new SystemException ("Internal error: should not happen. Input: " + sb);
185 sb.Insert (i++, (char) prim); // always single character
187 // handle blocked characters here.
188 while (removed < len) {
189 if (sb [i] == mappedChars [idx + removed]) {
192 // otherwise, skip it.
201 static int GetPrimaryCompositeMapIndex (object o, int cur, int bufferPos)
203 if ((PropValue (cur) & FullCompositionExclusion) != 0)
205 if (GetCombiningClass (cur) != 0)
206 return 0; // not a starter
207 return GetPrimaryCompositeCharIndex (o, bufferPos);
210 static string Decompose (string source, int checkType)
212 StringBuilder sb = null;
213 Decompose (source, ref sb, checkType);
214 return sb != null ? sb.ToString () : source;
217 static void Decompose (string source,
218 ref StringBuilder sb, int checkType)
222 for (int i = 0; i < source.Length; i++)
223 if (QuickCheck (source [i], checkType) == NormalizationCheck.No)
224 DecomposeChar (ref sb, ref buf, source,
227 sb.Append (source, start, source.Length - start);
228 ReorderCanonical (source, ref sb, 1);
231 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
234 // check only with src.
235 for (int i = 1; i < src.Length; i++) {
236 int level = GetCombiningClass (src [i]);
239 if (GetCombiningClass (src [i - 1]) > level) {
240 sb = new StringBuilder (src.Length);
241 sb.Append (src, 0, src.Length);
242 ReorderCanonical (src, ref sb, i);
248 // check only with sb
249 for (int i = start; i < sb.Length; i++) {
250 int level = GetCombiningClass (sb [i]);
253 if (GetCombiningClass (sb [i - 1]) > level) {
257 i--; // apply recursively
262 static void DecomposeChar (ref StringBuilder sb,
263 ref int [] buf, string s, int i, ref int start)
266 sb = new StringBuilder (s.Length + 100);
267 sb.Append (s, start, i - start);
270 GetCanonical (s [i], buf, 0);
271 for (int x = 0; ; x++) {
274 if (buf [x] < char.MaxValue)
275 sb.Append ((char) buf [x]);
277 sb.Append ((char) (buf [x] >> 10 + 0xD800));
278 sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
284 public static NormalizationCheck QuickCheck (char c, int type)
289 v = PropValue ((int) c);
290 return (v & NoNfc) == 0 ?
291 (v & MaybeNfc) == 0 ?
292 NormalizationCheck.Yes :
293 NormalizationCheck.Maybe :
294 NormalizationCheck.No;
296 if ('\uAC00' <= c && c <= '\uD7A3')
297 return NormalizationCheck.No;
298 return (PropValue ((int) c) & NoNfd) != 0 ?
299 NormalizationCheck.No : NormalizationCheck.Yes;
301 v = PropValue ((int) c);
302 return (v & NoNfkc) != 0 ? NormalizationCheck.No :
303 (v & MaybeNfkc) != 0 ?
304 NormalizationCheck.Maybe :
305 NormalizationCheck.Yes;
307 if ('\uAC00' <= c && c <= '\uD7A3')
308 return NormalizationCheck.No;
309 return (PropValue ((int) c) & NoNfkd) != 0 ?
310 NormalizationCheck.No : NormalizationCheck.Yes;
314 /* for now we don't use FC_NFKC closure
315 public static bool IsMultiForm (char c)
317 return (PropValue ((int) c) & 0xF0000000) != 0;
320 public static char SingleForm (char c)
322 uint v = PropValue ((int) c);
323 int idx = (int) ((v & 0x7FFF0000) >> 16);
324 return (char) singleNorm [idx];
327 public static void MultiForm (char c, char [] buf, int index)
329 // FIXME: handle surrogate
330 uint v = PropValue ((int) c);
331 int midx = (int) ((v & 0x7FFF0000) >> 16);
332 buf [index] = (char) multiNorm [midx];
333 buf [index + 1] = (char) multiNorm [midx + 1];
334 buf [index + 2] = (char) multiNorm [midx + 2];
335 buf [index + 3] = (char) multiNorm [midx + 3];
336 if (buf [index + 3] != 0)
337 buf [index + 4] = (char) 0; // zero termination
341 public static void GetCanonical (int c, int [] buf, int bufIdx)
343 for (int i = CharMapIdx (c); mappedChars [i] != 0; i++)
344 buf [bufIdx++] = mappedChars [i];
345 buf [bufIdx] = (char) 0;
348 public static bool IsNormalized (string source, int type)
351 for (int i = 0; i < source.Length; i++) {
352 int cc = GetCombiningClass (source [i]);
353 if (cc != 0 && cc < prevCC)
356 switch (QuickCheck (source [i], type)) {
357 case NormalizationCheck.Yes:
359 case NormalizationCheck.No:
361 case NormalizationCheck.Maybe:
362 // for those forms with composition, it cannot be checked here
366 return source == Normalize (source, type);
370 // partly copied from Combine()
372 for (;i > 0; i--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
373 if (GetCombiningClass ((int) source [i]) == 0)
376 // Now i is the "starter"
377 for (; i < cur; i++) {
378 if (GetPrimaryCompositeCharIndex (source, i) != 0)
387 public static string Normalize (string source, int type)
392 return Compose (source, type);
395 return Decompose (source, type);
400 static int* mappedChars;
401 static short* charMapIndex;
402 static short* helperIndex;
403 static ushort* mapIdxToComposite;
404 static byte* combiningClass;
408 public static readonly bool IsReady = true; // always
410 static Normalization ()
412 fixed (byte* tmp = propsArr) {
415 fixed (int* tmp = mappedCharsArr) {
418 fixed (short* tmp = charMapIndexArr) {
421 fixed (short* tmp = helperIndexArr) {
424 fixed (ushort* tmp = mapIdxToCompositeArr) {
425 mapIdxToComposite = tmp;
427 fixed (byte* tmp = combiningClassArr) {
428 combiningClass = tmp;
433 static object forLock = new object ();
434 public static readonly bool isReady;
436 public static bool IsReady {
437 get { return isReady; }
440 [MethodImpl (MethodImplOptions.InternalCall)]
441 static extern void load_normalization_resource (
442 out IntPtr props, out IntPtr mappedChars,
443 out IntPtr charMapIndex, out IntPtr helperIndex,
444 out IntPtr mapIdxToComposite, out IntPtr combiningClass);
446 static Normalization ()
448 IntPtr p1, p2, p3, p4, p5, p6;
450 load_normalization_resource (out p1, out p2, out p3, out p4, out p5, out p6);
452 mappedChars = (int*) p2;
453 charMapIndex = (short*) p3;
454 helperIndex = (short*) p4;
455 mapIdxToComposite = (ushort*) p5;
456 combiningClass = (byte*) p6;
466 // autogenerated code or icall to fill array runs here