2 using System.Globalization;
4 using System.Runtime.CompilerServices;
6 using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
8 namespace Mono.Globalization.Unicode
10 internal enum NormalizationCheck {
16 internal unsafe class Normalization
18 public const int NoNfd = 1;
19 public const int NoNfkd = 2;
20 public const int NoNfc = 4;
21 public const int MaybeNfc = 8;
22 public const int NoNfkc = 16;
23 public const int MaybeNfkc = 32;
24 public const int FullCompositionExclusion = 64;
25 public const int IsUnsafe = 128;
26 // public const int ExpandOnNfd = 256;
27 // public const int ExpandOnNfc = 512;
28 // public const int ExpandOnNfkd = 1024;
29 // public const int ExpandOnNfkc = 2048;
31 static uint PropValue (int cp)
33 return props [NUtil.PropIdx (cp)];
36 static int CharMapIdx (int cp)
38 return charMapIndex [NUtil.MapIdx (cp)];
41 static int GetComposedStringLength (int ch)
43 int start = charMapIndex [NUtil.MapIdx (ch)];
45 while (mappedChars [i] != 0)
50 static byte GetCombiningClass (int c)
52 return combiningClass [NUtil.Combining.ToIndex (c)];
55 static int GetPrimaryCompositeFromMapIndex (int src)
57 return mapIdxToComposite [NUtil.Composite.ToIndex (src)];
60 static int GetPrimaryCompositeHelperIndex (int cp)
62 return helperIndex [NUtil.Helper.ToIndex (cp)];
65 static int GetPrimaryCompositeCharIndex (object chars, int start)
67 string s = chars as string;
68 StringBuilder sb = chars as StringBuilder;
69 char startCh = s != null ? s [start] : sb [start];
70 int charsLength = sb != null ? sb.Length : s.Length;
72 int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
75 while (mappedChars [idx] == startCh) {
76 for (int i = 1, j = 1; ; i++, j++) {
77 if (mappedChars [idx + i] == 0)
80 if (start + i >= charsLength)
81 return 0; // didn't match
83 // handle blocked characters here.
91 combiningClass = GetCombiningClass (curCh);
92 if (++j + start >= charsLength ||
95 nextCB = GetCombiningClass (
99 } while (nextCB > 0 && combiningClass >= nextCB);
101 if (mappedChars [idx + i] == curCh)
103 if (mappedChars [idx + i] > curCh)
104 return 0; // no match
105 // otherwise move idx to next item
106 while (mappedChars [i] != 0)
112 // reached to end of entries
116 private static string Compose (string source, int checkType)
118 StringBuilder sb = null;
119 Decompose (source, ref sb, checkType);
121 sb = Combine (source, 0, checkType);
123 Combine (sb, 0, checkType);
125 return sb != null ? sb.ToString () : source;
128 private static StringBuilder Combine (string source, int start, int checkType)
130 for (int i = 0; i < source.Length; i++) {
131 if (QuickCheck (source [i], checkType) == NormalizationCheck.Yes)
133 StringBuilder sb = new StringBuilder (source.Length + source.Length / 10);
135 Combine (sb, i, checkType);
141 private static bool CanBePrimaryComposite (int i)
143 if (i >= 0x3400 && i <= 0x9FBB)
144 return GetPrimaryCompositeHelperIndex (i) != 0;
145 return (PropValue (i) & IsUnsafe) != 0;
148 private static void Combine (StringBuilder sb, int start, int checkType)
150 for (int i = start; i < sb.Length; i++) {
151 if (QuickCheck (sb [i], checkType) == NormalizationCheck.Yes)
155 // FIXME: It should check "blocked" too
156 for (;i > 0; i--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
157 if (!CanBePrimaryComposite ((int) sb [i]))
161 for (; i < cur; i++) {
162 idx = GetPrimaryCompositeMapIndex (sb, (int) sb [i], i);
170 int ch = GetPrimaryCompositeFromMapIndex (idx);
171 int len = GetComposedStringLength (ch);
172 if (ch == 0 || len == 0)
173 throw new SystemException ("Internal error: should not happen.");
175 sb.Insert (i++, (char) ch); // always single character
177 // handle blocked characters here.
178 while (removed < len) {
179 if (i + 1 < sb.Length) {
180 int cb = GetCombiningClass (sb [i]);
182 int next = GetCombiningClass (sb [i + 1]);
183 if (next != 0 && cb >= next) {
192 i = cur - 1; // apply recursively
196 static int GetPrimaryCompositeMapIndex (object o, int cur, int bufferPos)
198 if ((PropValue (cur) & FullCompositionExclusion) != 0)
200 if (GetCombiningClass (cur) != 0)
201 return 0; // not a starter
202 return GetPrimaryCompositeCharIndex (o, bufferPos);
205 static string Decompose (string source, int checkType)
207 StringBuilder sb = null;
208 Decompose (source, ref sb, checkType);
209 return sb != null ? sb.ToString () : source;
212 static void Decompose (string source,
213 ref StringBuilder sb, int checkType)
217 for (int i = 0; i < source.Length; i++)
218 if (QuickCheck (source [i], checkType) == NormalizationCheck.No)
219 DecomposeChar (ref sb, ref buf, source,
222 sb.Append (source, start, source.Length - start);
223 ReorderCanonical (source, ref sb, 1);
226 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
229 // check only with src.
230 for (int i = 1; i < src.Length; i++) {
231 int level = GetCombiningClass (src [i]);
234 if (GetCombiningClass (src [i - 1]) > level) {
235 sb = new StringBuilder (src.Length);
236 sb.Append (src, 0, src.Length);
237 ReorderCanonical (src, ref sb, i);
243 // check only with sb
244 for (int i = start; i < sb.Length; i++) {
245 int level = GetCombiningClass (sb [i]);
248 if (GetCombiningClass (sb [i - 1]) > level) {
252 i--; // apply recursively
257 static void DecomposeChar (ref StringBuilder sb,
258 ref int [] buf, string s, int i, ref int start)
261 sb = new StringBuilder (s.Length + 100);
262 sb.Append (s, start, i - start);
265 GetCanonical (s [i], buf, 0);
266 for (int x = 0; ; x++) {
269 if (buf [x] < char.MaxValue)
270 sb.Append ((char) buf [x]);
272 sb.Append ((char) (buf [x] >> 10 + 0xD800));
273 sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
279 public static NormalizationCheck QuickCheck (char c, int type)
284 v = PropValue ((int) c);
285 return (v & NoNfc) == 0 ?
286 (v & MaybeNfc) == 0 ?
287 NormalizationCheck.Yes :
288 NormalizationCheck.Maybe :
289 NormalizationCheck.No;
291 if ('\uAC00' <= c && c <= '\uD7A3')
292 return NormalizationCheck.No;
293 return (PropValue ((int) c) & NoNfd) != 0 ?
294 NormalizationCheck.No : NormalizationCheck.Yes;
296 v = PropValue ((int) c);
297 return (v & NoNfkc) != 0 ? NormalizationCheck.No :
298 (v & MaybeNfkc) != 0 ?
299 NormalizationCheck.Maybe :
300 NormalizationCheck.Yes;
302 if ('\uAC00' <= c && c <= '\uD7A3')
303 return NormalizationCheck.No;
304 return (PropValue ((int) c) & NoNfkd) != 0 ?
305 NormalizationCheck.No : NormalizationCheck.Yes;
309 /* for now we don't use FC_NFKC closure
310 public static bool IsMultiForm (char c)
312 return (PropValue ((int) c) & 0xF0000000) != 0;
315 public static char SingleForm (char c)
317 uint v = PropValue ((int) c);
318 int idx = (int) ((v & 0x7FFF0000) >> 16);
319 return (char) singleNorm [idx];
322 public static void MultiForm (char c, char [] buf, int index)
324 // FIXME: handle surrogate
325 uint v = PropValue ((int) c);
326 int midx = (int) ((v & 0x7FFF0000) >> 16);
327 buf [index] = (char) multiNorm [midx];
328 buf [index + 1] = (char) multiNorm [midx + 1];
329 buf [index + 2] = (char) multiNorm [midx + 2];
330 buf [index + 3] = (char) multiNorm [midx + 3];
331 if (buf [index + 3] != 0)
332 buf [index + 4] = (char) 0; // zero termination
336 public static void GetCanonical (int c, int [] buf, int bufIdx)
338 for (int i = CharMapIdx (c); mappedChars [i] != 0; i++)
339 buf [bufIdx++] = mappedChars [i];
340 buf [bufIdx] = (char) 0;
343 public static bool IsNormalized (string source, int type)
346 for (int i = 0; i < source.Length; i++) {
347 int cc = GetCombiningClass (source [i]);
348 if (cc != 0 && cc < prevCC)
351 switch (QuickCheck (source [i], type)) {
352 case NormalizationCheck.Yes:
354 case NormalizationCheck.No:
356 case NormalizationCheck.Maybe:
357 // for those forms with composition, it cannot be checked here
361 return source == Normalize (source, type);
365 // partly copied from Combine()
367 // FIXME: It should check "blocked" too
369 if (!CanBePrimaryComposite ((int) source [i]))
372 // Now i is the "starter"
373 for (; i < cur; i++) {
374 if (GetPrimaryCompositeCharIndex (source, i) != 0)
383 public static string Normalize (string source, int type)
388 return Compose (source, type);
391 return Decompose (source, type);
396 static int* mappedChars;
397 static short* charMapIndex;
398 static short* helperIndex;
399 static ushort* mapIdxToComposite;
400 static byte* combiningClass;
404 public static readonly bool IsReady = true; // always
406 static Normalization ()
408 fixed (byte* tmp = propsArr) {
411 fixed (int* tmp = mappedCharsArr) {
414 fixed (short* tmp = charMapIndexArr) {
417 fixed (short* tmp = helperIndexArr) {
420 fixed (ushort* tmp = mapIdxToCompositeArr) {
421 mapIdxToComposite = tmp;
423 fixed (byte* tmp = combiningClassArr) {
424 combiningClass = tmp;
429 static object forLock = new object ();
430 public static readonly bool isReady;
432 public static bool IsReady {
433 get { return isReady; }
436 [MethodImpl (MethodImplOptions.InternalCall)]
437 static extern void load_normalization_resource (
438 out IntPtr props, out IntPtr mappedChars,
439 out IntPtr charMapIndex, out IntPtr helperIndex,
440 out IntPtr mapIdxToComposite, out IntPtr combiningClass);
442 static Normalization ()
444 IntPtr p1, p2, p3, p4, p5, p6;
446 load_normalization_resource (out p1, out p2, out p3, out p4, out p5, out p6);
448 mappedChars = (int*) p2;
449 charMapIndex = (short*) p3;
450 helperIndex = (short*) p4;
451 mapIdxToComposite = (ushort*) p5;
452 combiningClass = (byte*) p6;
462 // autogenerated code or icall to fill array runs here