2 using System.Globalization;
4 using System.Runtime.CompilerServices;
6 using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
8 namespace Mono.Globalization.Unicode
10 internal enum NormalizationCheck {
16 internal unsafe class Normalization
18 public const int NoNfd = 1;
19 public const int NoNfkd = 2;
20 public const int NoNfc = 4;
21 public const int MaybeNfc = 8;
22 public const int NoNfkc = 16;
23 public const int MaybeNfkc = 32;
24 public const int FullCompositionExclusion = 64;
25 public const int IsUnsafe = 128;
26 // public const int ExpandOnNfd = 256;
27 // public const int ExpandOnNfc = 512;
28 // public const int ExpandOnNfkd = 1024;
29 // public const int ExpandOnNfkc = 2048;
31 static uint PropValue (int cp)
33 return props [NUtil.PropIdx (cp)];
36 static int CharMapIdx (int cp)
38 return charMapIndex [NUtil.MapIdx (cp)];
41 static int GetComposedStringLength (int ch)
43 int start = charMapIndex [NUtil.MapIdx (ch)];
45 while (mappedChars [i] != 0)
50 static byte GetCombiningClass (int c)
52 return combiningClass [NUtil.Combining.ToIndex (c)];
55 static int GetPrimaryCompositeFromMapIndex (int src)
57 return mapIdxToComposite [NUtil.Composite.ToIndex (src)];
60 static short GetPrimaryCompositeHelperIndex (int cp)
62 return helperIndex [NUtil.Helper.ToIndex (cp)];
65 static int GetPrimaryCompositeCharIndex (object chars, int start)
67 string s = chars as string;
68 StringBuilder sb = chars as StringBuilder;
69 char startCh = s != null ? s [start] : sb [start];
70 int charsLength = sb != null ? sb.Length : s.Length;
72 int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
75 while (mappedChars [idx] == startCh) {
76 for (int i = 1, j = 1; ; i++, j++) {
77 if (mappedChars [idx + i] == 0)
80 if (start + i >= charsLength)
81 return 0; // didn't match
83 // handle blocked characters here.
91 combiningClass = GetCombiningClass (curCh);
92 if (++j + start >= charsLength ||
95 nextCB = GetCombiningClass (
99 } while (nextCB > 0 && combiningClass >= nextCB);
101 if (mappedChars [idx + i] == curCh)
103 if (mappedChars [idx + i] > curCh)
104 return 0; // no match
105 // otherwise move idx to next item
106 while (mappedChars [i] != 0)
112 // reached to end of entries
116 private static string Compose (string source, int checkType)
118 StringBuilder sb = null;
119 Decompose (source, ref sb, checkType);
121 sb = Combine (source, 0, checkType);
123 Combine (sb, 0, checkType);
125 return sb != null ? sb.ToString () : source;
128 private static StringBuilder Combine (string source, int start, int checkType)
130 for (int i = 0; i < source.Length; i++) {
131 if (QuickCheck (source [i], checkType) == NormalizationCheck.Yes)
133 StringBuilder sb = new StringBuilder (source.Length + source.Length / 10);
135 Combine (sb, i, checkType);
141 private static bool CanBePrimaryComposite (int i)
143 if (i >= 0x3400 && i <= 0x9FBB)
144 return GetPrimaryCompositeHelperIndex (i) != 0;
145 return (PropValue (i) & IsUnsafe) != 0;
148 private static void Combine (StringBuilder sb, int start, int checkType)
150 for (int i = start; i < sb.Length; i++) {
151 switch (QuickCheck (sb [i], checkType)) {
152 case NormalizationCheck.Yes:
154 case NormalizationCheck.No:
156 case NormalizationCheck.Maybe:
164 // FIXME: It should check "blocked" too
166 if (!CanBePrimaryComposite ((int) sb [i]))
170 for (; i < cur; i++) {
171 idx = GetPrimaryCompositeMapIndex (sb, (int) sb [i], i);
179 int ch = GetPrimaryCompositeFromMapIndex (idx);
180 int len = GetComposedStringLength (ch);
181 if (ch == 0 || len == 0)
182 throw new SystemException ("Internal error: should not happen.");
184 sb.Insert (i++, (char) ch); // always single character
186 // handle blocked characters here.
187 while (removed < len) {
188 if (i + 1 < sb.Length) {
189 int cb = GetCombiningClass (sb [i]);
191 int next = GetCombiningClass (sb [i + 1]);
192 if (next != 0 && cb >= next) {
201 i = cur - 1; // apply recursively
205 static int GetPrimaryCompositeMapIndex (object o, int cur, int bufferPos)
207 if ((PropValue (cur) & FullCompositionExclusion) != 0)
209 if (GetCombiningClass (cur) != 0)
210 return 0; // not a starter
211 return GetPrimaryCompositeCharIndex (o, bufferPos);
214 static string Decompose (string source, int checkType)
216 StringBuilder sb = null;
217 Decompose (source, ref sb, checkType);
218 return sb != null ? sb.ToString () : source;
221 static void Decompose (string source,
222 ref StringBuilder sb, int checkType)
226 for (int i = 0; i < source.Length; i++)
227 if (QuickCheck (source [i], checkType) == NormalizationCheck.No)
228 DecomposeChar (ref sb, ref buf, source,
231 sb.Append (source, start, source.Length - start);
232 ReorderCanonical (source, ref sb, 1);
235 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
238 // check only with src.
239 for (int i = 1; i < src.Length; i++) {
240 int level = GetCombiningClass (src [i]);
243 if (GetCombiningClass (src [i - 1]) > level) {
244 sb = new StringBuilder (src.Length);
245 sb.Append (src, 0, src.Length);
246 ReorderCanonical (src, ref sb, i);
252 // check only with sb
253 for (int i = start; i < sb.Length; i++) {
254 int level = GetCombiningClass (sb [i]);
257 if (GetCombiningClass (sb [i - 1]) > level) {
261 i--; // apply recursively
266 static void DecomposeChar (ref StringBuilder sb,
267 ref int [] buf, string s, int i, ref int start)
270 sb = new StringBuilder (s.Length + 100);
271 sb.Append (s, start, i - start);
274 GetCanonical (s [i], buf, 0);
275 for (int x = 0; ; x++) {
278 if (buf [x] < char.MaxValue)
279 sb.Append ((char) buf [x]);
281 sb.Append ((char) (buf [x] >> 10 + 0xD800));
282 sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
288 public static NormalizationCheck QuickCheck (char c, int type)
293 v = PropValue ((int) c);
294 return (v & NoNfc) == 0 ?
295 (v & MaybeNfc) == 0 ?
296 NormalizationCheck.Yes :
297 NormalizationCheck.Maybe :
298 NormalizationCheck.No;
300 if ('\uAC00' <= c && c <= '\uD7A3')
301 return NormalizationCheck.No;
302 return (PropValue ((int) c) & NoNfd) != 0 ?
303 NormalizationCheck.No : NormalizationCheck.Yes;
305 v = PropValue ((int) c);
306 return (v & NoNfkc) != 0 ? NormalizationCheck.No :
307 (v & MaybeNfkc) != 0 ?
308 NormalizationCheck.Maybe :
309 NormalizationCheck.Yes;
311 if ('\uAC00' <= c && c <= '\uD7A3')
312 return NormalizationCheck.No;
313 return (PropValue ((int) c) & NoNfkd) != 0 ?
314 NormalizationCheck.No : NormalizationCheck.Yes;
318 /* for now we don't use FC_NFKC closure
319 public static bool IsMultiForm (char c)
321 return (PropValue ((int) c) & 0xF0000000) != 0;
324 public static char SingleForm (char c)
326 uint v = PropValue ((int) c);
327 int idx = (int) ((v & 0x7FFF0000) >> 16);
328 return (char) singleNorm [idx];
331 public static void MultiForm (char c, char [] buf, int index)
333 // FIXME: handle surrogate
334 uint v = PropValue ((int) c);
335 int midx = (int) ((v & 0x7FFF0000) >> 16);
336 buf [index] = (char) multiNorm [midx];
337 buf [index + 1] = (char) multiNorm [midx + 1];
338 buf [index + 2] = (char) multiNorm [midx + 2];
339 buf [index + 3] = (char) multiNorm [midx + 3];
340 if (buf [index + 3] != 0)
341 buf [index + 4] = (char) 0; // zero termination
345 public static void GetCanonical (int c, int [] buf, int bufIdx)
347 for (int i = CharMapIdx (c); mappedChars [i] != 0; i++)
348 buf [bufIdx++] = mappedChars [i];
349 buf [bufIdx] = (char) 0;
352 public static bool IsNormalized (string source, int type)
355 for (int i = 0; i < source.Length; i++) {
356 int cc = GetCombiningClass (source [i]);
357 if (cc != 0 && cc < prevCC)
360 switch (QuickCheck (source [i], type)) {
361 case NormalizationCheck.Yes:
363 case NormalizationCheck.No:
365 case NormalizationCheck.Maybe:
366 // partly copied from Combine()
368 // FIXME: It should check "blocked" too
370 if (!CanBePrimaryComposite ((int) source [i]))
373 // Now i is the "starter"
374 for (; i < cur; i++) {
375 if (GetPrimaryCompositeCharIndex (source, i) != 0)
384 public static string Normalize (string source, int type)
389 return Compose (source, type);
391 return Decompose (source, type);
396 static int* mappedChars;
397 static short* charMapIndex;
398 static short* helperIndex;
399 static ushort* mapIdxToComposite;
400 static byte* combiningClass;
404 public static readonly bool IsReady = true; // always
406 static Normalization ()
408 fixed (byte* tmp = propsArr) {
411 fixed (int* tmp = mappedCharsArr) {
414 fixed (short* tmp = charMapIndexArr) {
417 fixed (short* tmp = helperIndexArr) {
420 fixed (ushort* tmp = mapIdxToCompositeArr) {
421 mapIdxToComposite = tmp;
423 fixed (byte* tmp = combiningClassArr) {
424 combiningClass = tmp;
429 static object forLock = new object ();
430 public static readonly bool isReady;
432 public static bool IsReady {
433 get { return isReady; }
436 [MethodImpl (MethodImplOptions.InternalCall)]
437 static extern void load_normalization_resource (
438 out IntPtr props, out IntPtr mappedChars,
439 out IntPtr charMapIndex, out IntPtr helperIndex,
440 out IntPtr mapIdxToComposite, out IntPtr combiningClass);
442 static Normalization ()
444 IntPtr p1, p2, p3, p4, p5, p6;
446 load_normalization_resource (out p1, out p2, out p3, out p4, out p5, out p6);
448 mappedChars = (int*) p2;
449 charMapIndex = (short*) p3;
450 helperIndex = (short*) p4;
451 mapIdxToComposite = (ushort*) p5;
452 combiningClass = (byte*) p6;
462 // autogenerated code or icall to fill array runs here