2 using System.Globalization;
4 using System.Runtime.CompilerServices;
6 using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
8 namespace Mono.Globalization.Unicode
10 internal enum NormalizationCheck {
16 internal unsafe class Normalization
18 public const int NoNfd = 1;
19 public const int NoNfkd = 2;
20 public const int NoNfc = 4;
21 public const int MaybeNfc = 8;
22 public const int NoNfkc = 16;
23 public const int MaybeNfkc = 32;
24 public const int FullCompositionExclusion = 64;
25 public const int IsUnsafe = 128;
26 // public const int ExpandOnNfd = 256;
27 // public const int ExpandOnNfc = 512;
28 // public const int ExpandOnNfkd = 1024;
29 // public const int ExpandOnNfkc = 2048;
31 static uint PropValue (int cp)
33 return props [NUtil.PropIdx (cp)];
36 static int CharMapIdx (int cp)
38 return charMapIndex [NUtil.MapIdx (cp)];
41 static int GetComposedStringLength (int ch)
43 int start = charMapIndex [NUtil.MapIdx (ch)];
45 while (mappedChars [i] != 0)
50 static byte GetCombiningClass (int c)
52 return combiningClass [NUtil.Combining.ToIndex (c)];
55 static int GetPrimaryCompositeFromMapIndex (int src)
57 return mapIdxToComposite [NUtil.Composite.ToIndex (src)];
60 static int GetPrimaryCompositeHelperIndex (int cp)
62 int originalMapIndex = helperIndex [NUtil.Helper.ToIndex (cp)]; // it returns an index at uncompressed state.
63 return NUtil.Map.ToIndex (originalMapIndex);
66 static int GetPrimaryCompositeCharIndex (object chars, int start)
68 string s = chars as string;
69 StringBuilder sb = chars as StringBuilder;
70 char startCh = s != null ? s [start] : sb [start];
71 int charsLength = sb != null ? sb.Length : s.Length;
73 int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
76 while (mappedChars [idx] == startCh) {
77 for (int i = 1, j = 1; ; i++, j++) {
78 if (mappedChars [idx + i] == 0)
81 if (start + i >= charsLength)
82 return 0; // didn't match
84 // handle blocked characters here.
92 combiningClass = GetCombiningClass (curCh);
93 if (++j + start >= charsLength ||
96 nextCB = GetCombiningClass (
100 } while (nextCB > 0 && combiningClass >= nextCB);
102 if (mappedChars [idx + i] == curCh)
104 if (mappedChars [idx + i] > curCh)
105 return 0; // no match
106 // otherwise move idx to next item
107 while (mappedChars [i] != 0)
113 // reached to end of entries
117 private static string Compose (string source, int checkType)
119 StringBuilder sb = null;
120 Decompose (source, ref sb, checkType);
122 sb = Combine (source, 0, checkType);
124 Combine (sb, 0, checkType);
126 return sb != null ? sb.ToString () : source;
129 private static StringBuilder Combine (string source, int start, int checkType)
131 for (int i = 0; i < source.Length; i++) {
132 if (QuickCheck (source [i], checkType) == NormalizationCheck.Yes)
134 StringBuilder sb = new StringBuilder (source.Length + source.Length / 10);
136 Combine (sb, i, checkType);
142 private static bool CanBePrimaryComposite (int i)
144 if (i >= 0x3400 && i <= 0x9FBB)
145 return GetPrimaryCompositeHelperIndex (i) != 0;
146 return (PropValue (i) & IsUnsafe) != 0;
149 private static void Combine (StringBuilder sb, int start, int checkType)
151 for (int i = start; i < sb.Length; i++) {
152 switch (QuickCheck (sb [i], checkType)) {
153 case NormalizationCheck.Yes:
155 case NormalizationCheck.No:
157 case NormalizationCheck.Maybe:
165 // FIXME: It should check "blocked" too
167 if (!CanBePrimaryComposite ((int) sb [i]))
171 for (; i < cur; i++) {
172 idx = GetPrimaryCompositeMapIndex (sb, (int) sb [i], i);
180 int ch = GetPrimaryCompositeFromMapIndex (idx);
181 int len = GetComposedStringLength (ch);
182 if (ch == 0 || len == 0)
183 throw new SystemException ("Internal error: should not happen.");
185 sb.Insert (i++, (char) ch); // always single character
187 // handle blocked characters here.
188 while (removed < len) {
189 if (i + 1 < sb.Length) {
190 int cb = GetCombiningClass (sb [i]);
192 int next = GetCombiningClass (sb [i + 1]);
193 if (next != 0 && cb >= next) {
202 i = cur - 1; // apply recursively
206 static int GetPrimaryCompositeMapIndex (object o, int cur, int bufferPos)
208 if ((PropValue (cur) & FullCompositionExclusion) != 0)
210 if (GetCombiningClass (cur) != 0)
211 return 0; // not a starter
212 return GetPrimaryCompositeCharIndex (o, bufferPos);
215 static string Decompose (string source, int checkType)
217 StringBuilder sb = null;
218 Decompose (source, ref sb, checkType);
219 return sb != null ? sb.ToString () : source;
222 static void Decompose (string source,
223 ref StringBuilder sb, int checkType)
227 for (int i = 0; i < source.Length; i++)
228 if (QuickCheck (source [i], checkType) == NormalizationCheck.No)
229 DecomposeChar (ref sb, ref buf, source,
232 sb.Append (source, start, source.Length - start);
233 ReorderCanonical (source, ref sb, 1);
236 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
239 // check only with src.
240 for (int i = 1; i < src.Length; i++) {
241 int level = GetCombiningClass (src [i]);
244 if (GetCombiningClass (src [i - 1]) > level) {
245 sb = new StringBuilder (src.Length);
246 sb.Append (src, 0, src.Length);
247 ReorderCanonical (src, ref sb, i);
253 // check only with sb
254 for (int i = start; i < sb.Length; i++) {
255 int level = GetCombiningClass (sb [i]);
258 if (GetCombiningClass (sb [i - 1]) > level) {
262 i--; // apply recursively
267 static void DecomposeChar (ref StringBuilder sb,
268 ref int [] buf, string s, int i, ref int start)
271 sb = new StringBuilder (s.Length + 100);
272 sb.Append (s, start, i - start);
275 GetCanonical (s [i], buf, 0);
276 for (int x = 0; ; x++) {
279 if (buf [x] < char.MaxValue)
280 sb.Append ((char) buf [x]);
282 sb.Append ((char) (buf [x] >> 10 + 0xD800));
283 sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
289 public static NormalizationCheck QuickCheck (char c, int type)
294 v = PropValue ((int) c);
295 return (v & NoNfc) == 0 ?
296 (v & MaybeNfc) == 0 ?
297 NormalizationCheck.Yes :
298 NormalizationCheck.Maybe :
299 NormalizationCheck.No;
301 if ('\uAC00' <= c && c <= '\uD7A3')
302 return NormalizationCheck.No;
303 return (PropValue ((int) c) & NoNfd) != 0 ?
304 NormalizationCheck.No : NormalizationCheck.Yes;
306 v = PropValue ((int) c);
307 return (v & NoNfkc) != 0 ? NormalizationCheck.No :
308 (v & MaybeNfkc) != 0 ?
309 NormalizationCheck.Maybe :
310 NormalizationCheck.Yes;
312 if ('\uAC00' <= c && c <= '\uD7A3')
313 return NormalizationCheck.No;
314 return (PropValue ((int) c) & NoNfkd) != 0 ?
315 NormalizationCheck.No : NormalizationCheck.Yes;
319 /* for now we don't use FC_NFKC closure
320 public static bool IsMultiForm (char c)
322 return (PropValue ((int) c) & 0xF0000000) != 0;
325 public static char SingleForm (char c)
327 uint v = PropValue ((int) c);
328 int idx = (int) ((v & 0x7FFF0000) >> 16);
329 return (char) singleNorm [idx];
332 public static void MultiForm (char c, char [] buf, int index)
334 // FIXME: handle surrogate
335 uint v = PropValue ((int) c);
336 int midx = (int) ((v & 0x7FFF0000) >> 16);
337 buf [index] = (char) multiNorm [midx];
338 buf [index + 1] = (char) multiNorm [midx + 1];
339 buf [index + 2] = (char) multiNorm [midx + 2];
340 buf [index + 3] = (char) multiNorm [midx + 3];
341 if (buf [index + 3] != 0)
342 buf [index + 4] = (char) 0; // zero termination
346 public static void GetCanonical (int c, int [] buf, int bufIdx)
348 for (int i = CharMapIdx (c); mappedChars [i] != 0; i++)
349 buf [bufIdx++] = mappedChars [i];
350 buf [bufIdx] = (char) 0;
353 public static bool IsNormalized (string source, int type)
356 for (int i = 0; i < source.Length; i++) {
357 int cc = GetCombiningClass (source [i]);
358 if (cc != 0 && cc < prevCC)
361 switch (QuickCheck (source [i], type)) {
362 case NormalizationCheck.Yes:
364 case NormalizationCheck.No:
366 case NormalizationCheck.Maybe:
367 // for those forms with composition, it cannot be checked here
371 return source == Normalize (source, type);
375 // partly copied from Combine()
377 // FIXME: It should check "blocked" too
379 if (!CanBePrimaryComposite ((int) source [i]))
382 // Now i is the "starter"
383 for (; i < cur; i++) {
384 if (GetPrimaryCompositeCharIndex (source, i) != 0)
393 public static string Normalize (string source, int type)
398 return Compose (source, type);
400 return Decompose (source, type);
405 static int* mappedChars;
406 static short* charMapIndex;
407 static short* helperIndex;
408 static ushort* mapIdxToComposite;
409 static byte* combiningClass;
413 public static readonly bool IsReady = true; // always
415 static Normalization ()
417 fixed (byte* tmp = propsArr) {
420 fixed (int* tmp = mappedCharsArr) {
423 fixed (short* tmp = charMapIndexArr) {
426 fixed (short* tmp = helperIndexArr) {
429 fixed (ushort* tmp = mapIdxToCompositeArr) {
430 mapIdxToComposite = tmp;
432 fixed (byte* tmp = combiningClassArr) {
433 combiningClass = tmp;
438 static object forLock = new object ();
439 public static readonly bool isReady;
441 public static bool IsReady {
442 get { return isReady; }
445 [MethodImpl (MethodImplOptions.InternalCall)]
446 static extern void load_normalization_resource (
447 out IntPtr props, out IntPtr mappedChars,
448 out IntPtr charMapIndex, out IntPtr helperIndex,
449 out IntPtr mapIdxToComposite, out IntPtr combiningClass);
451 static Normalization ()
453 IntPtr p1, p2, p3, p4, p5, p6;
455 load_normalization_resource (out p1, out p2, out p3, out p4, out p5, out p6);
457 mappedChars = (int*) p2;
458 charMapIndex = (short*) p3;
459 helperIndex = (short*) p4;
460 mapIdxToComposite = (ushort*) p5;
461 combiningClass = (byte*) p6;
471 // autogenerated code or icall to fill array runs here