* roottypes.cs: Rename from tree.cs.
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / Normalization.cs
1 using System;
2 using System.Globalization;
3 using System.Text;
4 using System.Runtime.CompilerServices;
5
6 using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
7
8 namespace Mono.Globalization.Unicode
9 {
10         internal enum NormalizationCheck {
11                 Yes,
12                 No,
13                 Maybe
14         }
15
16         internal unsafe class Normalization
17         {
18                 public const int NoNfd = 1;
19                 public const int NoNfkd = 2;
20                 public const int NoNfc = 4;
21                 public const int MaybeNfc = 8;
22                 public const int NoNfkc = 16;
23                 public const int MaybeNfkc = 32;
24                 public const int FullCompositionExclusion = 64;
25                 public const int IsUnsafe = 128;
26 //              public const int ExpandOnNfd = 256;
27 //              public const int ExpandOnNfc = 512;
28 //              public const int ExpandOnNfkd = 1024;
29 //              public const int ExpandOnNfkc = 2048;
30
31                 static uint PropValue (int cp)
32                 {
33                         return props [NUtil.PropIdx (cp)];
34                 }
35
36                 static int CharMapIdx (int cp)
37                 {
38                         return charMapIndex [NUtil.MapIdx (cp)];
39                 }
40
41                 static int GetComposedStringLength (int ch)
42                 {
43                         int start = charMapIndex [NUtil.MapIdx (ch)];
44                         int i = start;
45                         while (mappedChars [i] != 0)
46                                 i++;
47                         return i - start;
48                 }
49
50                 static byte GetCombiningClass (int c)
51                 {
52                         return combiningClass [NUtil.Combining.ToIndex (c)];
53                 }
54
55                 static int GetPrimaryCompositeFromMapIndex (int src)
56                 {
57                         return mapIdxToComposite [NUtil.Composite.ToIndex (src)];
58                 }
59
60                 static short GetPrimaryCompositeHelperIndex (int cp)
61                 {
62                         return helperIndex [NUtil.Helper.ToIndex (cp)];
63                 }
64
65                 static int GetPrimaryCompositeCharIndex (object chars, int start)
66                 {
67                         string s = chars as string;
68                         StringBuilder sb = chars as StringBuilder;
69                         char startCh = s != null ? s [start] : sb [start];
70                         int charsLength = sb != null ? sb.Length : s.Length;
71
72                         int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
73                         if (idx == 0)
74                                 return 0;
75                         while (mappedChars [idx] == startCh) {
76                                 for (int i = 1, j = 1; ; i++, j++) {
77                                         if (mappedChars [idx + i] == 0)
78                                                 // matched
79                                                 return idx;
80                                         if (start + i >= charsLength)
81                                                 return 0; // didn't match
82
83                                         // handle blocked characters here.
84                                         char curCh;
85                                         int combiningClass;
86                                         int nextCB = 0;
87                                         do {
88                                                 curCh = s != null ?
89                                                         s [start + j] :
90                                                         sb [start + j];
91                                                 combiningClass = GetCombiningClass (curCh);
92                                                 if (++j + start >= charsLength ||
93                                                         combiningClass == 0)
94                                                         break;
95                                                 nextCB = GetCombiningClass (
96                                                         s != null ?
97                                                         s [start + j] :
98                                                         sb [start + j]);
99                                         } while (nextCB > 0 && combiningClass >= nextCB);
100                                         j--;
101                                         if (mappedChars [idx + i] == curCh)
102                                                 continue;
103                                         if (mappedChars [idx + i] > curCh)
104                                                 return 0; // no match
105                                         // otherwise move idx to next item
106                                         while (mappedChars [i] != 0)
107                                                 i++;
108                                         idx += i + 1;
109                                         break;
110                                 }
111                         }
112                         // reached to end of entries
113                         return 0;
114                 }
115
116                 private static string Compose (string source, int checkType)
117                 {
118                         StringBuilder sb = null;
119                         Decompose (source, ref sb, checkType);
120                         if (sb == null)
121                                 sb = Combine (source, 0, checkType);
122                         else
123                                 Combine (sb, 0, checkType);
124
125                         return sb != null ? sb.ToString () : source;
126                 }
127
128                 private static StringBuilder Combine (string source, int start, int checkType)
129                 {
130                         for (int i = 0; i < source.Length; i++) {
131                                 if (QuickCheck (source [i], checkType) == NormalizationCheck.Yes)
132                                         continue;
133                                 StringBuilder sb = new StringBuilder (source.Length + source.Length / 10);
134                                 sb.Append (source);
135                                 Combine (sb, i, checkType);
136                                 return sb;
137                         }
138                         return null;
139                 }
140
141                 private static bool CanBePrimaryComposite (int i)
142                 {
143                         if (i >= 0x3400 && i <= 0x9FBB)
144                                 return GetPrimaryCompositeHelperIndex (i) != 0;
145                         return (PropValue (i) & IsUnsafe) != 0;
146                 }
147
148                 private static void Combine (StringBuilder sb, int start, int checkType)
149                 {
150                         for (int i = start; i < sb.Length; i++) {
151                                 switch (QuickCheck (sb [i], checkType)) {
152                                 case NormalizationCheck.Yes:
153                                         continue;
154                                 case NormalizationCheck.No:
155                                         break;
156                                 case NormalizationCheck.Maybe:
157                                         if (i == 0)
158                                                 continue;
159                                         else
160                                                 break;
161                                 }
162
163                                 int cur = i;
164                                 // FIXME: It should check "blocked" too
165                                 for (;i >= 0; i--)
166                                         if (!CanBePrimaryComposite ((int) sb [i]))
167                                                 break;
168                                 i++;
169                                 int idx = 0;
170                                 for (; i < cur; i++) {
171                                         idx = GetPrimaryCompositeMapIndex (sb, (int) sb [i], i);
172                                         if (idx > 0)
173                                                 break;
174                                 }
175                                 if (idx == 0) {
176                                         i = cur;
177                                         continue;
178                                 }
179                                 int ch = GetPrimaryCompositeFromMapIndex (idx);
180                                 int len = GetComposedStringLength (ch);
181                                 if (ch == 0 || len == 0)
182                                         throw new SystemException ("Internal error: should not happen.");
183                                 int removed = 0;
184                                 sb.Insert (i++, (char) ch); // always single character
185
186                                 // handle blocked characters here.
187                                 while (removed < len) {
188                                         if (i + 1 < sb.Length) {
189                                                 int cb = GetCombiningClass (sb [i]);
190                                                 if (cb > 0) {
191                                                         int next = GetCombiningClass (sb [i + 1]);
192                                                         if (next != 0 && cb >= next) {
193                                                                 i++;
194                                                                 continue;
195                                                         }
196                                                 }
197                                         }
198                                         sb.Remove (i, 1);
199                                         removed++;
200                                 }
201                                 i = cur - 1; // apply recursively
202                         }
203                 }
204
205                 static int GetPrimaryCompositeMapIndex (object o, int cur, int bufferPos)
206                 {
207                         if ((PropValue (cur) & FullCompositionExclusion) != 0)
208                                 return 0;
209                         if (GetCombiningClass (cur) != 0)
210                                 return 0; // not a starter
211                         return GetPrimaryCompositeCharIndex (o, bufferPos);
212                 }
213
214                 static string Decompose (string source, int checkType)
215                 {
216                         StringBuilder sb = null;
217                         Decompose (source, ref sb, checkType);
218                         return sb != null ? sb.ToString () : source;
219                 }
220
221                 static void Decompose (string source,
222                         ref StringBuilder sb, int checkType)
223                 {
224                         int [] buf = null;
225                         int start = 0;
226                         for (int i = 0; i < source.Length; i++)
227                                 if (QuickCheck (source [i], checkType) == NormalizationCheck.No)
228                                         DecomposeChar (ref sb, ref buf, source,
229                                                 i, ref start);
230                         if (sb != null)
231                                 sb.Append (source, start, source.Length - start);
232                         ReorderCanonical (source, ref sb, 1);
233                 }
234
235                 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
236                 {
237                         if (sb == null) {
238                                 // check only with src.
239                                 for (int i = 1; i < src.Length; i++) {
240                                         int level = GetCombiningClass (src [i]);
241                                         if (level == 0)
242                                                 continue;
243                                         if (GetCombiningClass (src [i - 1]) > level) {
244                                                 sb = new StringBuilder (src.Length);
245                                                 sb.Append (src, 0, src.Length);
246                                                 ReorderCanonical (src, ref sb, i);
247                                                 return;
248                                         }
249                                 }
250                                 return;
251                         }
252                         // check only with sb
253                         for (int i = start; i < sb.Length; i++) {
254                                 int level = GetCombiningClass (sb [i]);
255                                 if (level == 0)
256                                         continue;
257                                 if (GetCombiningClass (sb [i - 1]) > level) {
258                                         char c = sb [i - 1];
259                                         sb [i - 1] = sb [i];
260                                         sb [i] = c;
261                                         i--; // apply recursively
262                                 }
263                         }
264                 }
265
266                 static void DecomposeChar (ref StringBuilder sb,
267                         ref int [] buf, string s, int i, ref int start)
268                 {
269                         if (sb == null)
270                                 sb = new StringBuilder (s.Length + 100);
271                         sb.Append (s, start, i - start);
272                         if (buf == null)
273                                 buf = new int [19];
274                         GetCanonical (s [i], buf, 0);
275                         for (int x = 0; ; x++) {
276                                 if (buf [x] == 0)
277                                         break;
278                                 if (buf [x] < char.MaxValue)
279                                         sb.Append ((char) buf [x]);
280                                 else { // surrogate
281                                         sb.Append ((char) (buf [x] >> 10 + 0xD800));
282                                         sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
283                                 }
284                         }
285                         start = i + 1;
286                 }
287
288                 public static NormalizationCheck QuickCheck (char c, int type)
289                 {
290                         uint v;
291                         switch (type) {
292                         default: // NFC
293                                 v = PropValue ((int) c);
294                                 return (v & NoNfc) == 0 ?
295                                         (v & MaybeNfc) == 0 ?
296                                         NormalizationCheck.Yes :
297                                         NormalizationCheck.Maybe :
298                                         NormalizationCheck.No;
299                         case 1: // NFD
300                                 if ('\uAC00' <= c && c <= '\uD7A3')
301                                         return NormalizationCheck.No;
302                                 return (PropValue ((int) c) & NoNfd) != 0 ?
303                                         NormalizationCheck.No : NormalizationCheck.Yes;
304                         case 2: // NFKC
305                                 v = PropValue ((int) c);
306                                 return (v & NoNfkc) != 0 ? NormalizationCheck.No :
307                                         (v & MaybeNfkc) != 0 ?
308                                         NormalizationCheck.Maybe :
309                                         NormalizationCheck.Yes;
310                         case 3: // NFKD
311                                 if ('\uAC00' <= c && c <= '\uD7A3')
312                                         return NormalizationCheck.No;
313                                 return (PropValue ((int) c) & NoNfkd) != 0 ?
314                                         NormalizationCheck.No : NormalizationCheck.Yes;
315                         }
316                 }
317
318                 /* for now we don't use FC_NFKC closure
319                 public static bool IsMultiForm (char c)
320                 {
321                         return (PropValue ((int) c) & 0xF0000000) != 0;
322                 }
323
324                 public static char SingleForm (char c)
325                 {
326                         uint v = PropValue ((int) c);
327                         int idx = (int) ((v & 0x7FFF0000) >> 16);
328                         return (char) singleNorm [idx];
329                 }
330
331                 public static void MultiForm (char c, char [] buf, int index)
332                 {
333                         // FIXME: handle surrogate
334                         uint v = PropValue ((int) c);
335                         int midx = (int) ((v & 0x7FFF0000) >> 16);
336                         buf [index] = (char) multiNorm [midx];
337                         buf [index + 1] = (char) multiNorm [midx + 1];
338                         buf [index + 2] = (char) multiNorm [midx + 2];
339                         buf [index + 3] = (char) multiNorm [midx + 3];
340                         if (buf [index + 3] != 0)
341                                 buf [index + 4] = (char) 0; // zero termination
342                 }
343                 */
344
345                 public static void GetCanonical (int c, int [] buf, int bufIdx)
346                 {
347                         for (int i = CharMapIdx (c); mappedChars [i] != 0; i++)
348                                 buf [bufIdx++] = mappedChars [i];
349                         buf [bufIdx] = (char) 0;
350                 }
351
352                 public static bool IsNormalized (string source, int type)
353                 {
354                         int prevCC = -1;
355                         for (int i = 0; i < source.Length; i++) {
356                                 int cc = GetCombiningClass (source [i]);
357                                 if (cc != 0 && cc < prevCC)
358                                         return false;
359                                 prevCC = cc;
360                                 switch (QuickCheck (source [i], type)) {
361                                 case NormalizationCheck.Yes:
362                                         break;
363                                 case NormalizationCheck.No:
364                                         return false;
365                                 case NormalizationCheck.Maybe:
366                                         // partly copied from Combine()
367                                         int cur = i;
368                                         // FIXME: It should check "blocked" too
369                                         for (;i >= 0; i--)
370                                                 if (!CanBePrimaryComposite ((int) source [i]))
371                                                         break;
372                                         i++;
373                                         // Now i is the "starter"
374                                         for (; i < cur; i++) {
375                                                 if (GetPrimaryCompositeCharIndex (source, i) != 0)
376                                                         return false;
377                                         }
378                                         break;
379                                 }
380                         }
381                         return true;
382                 }
383
384                 public static string Normalize (string source, int type)
385                 {
386                         switch (type) {
387                         default:
388                         case 2:
389                                 return Compose (source, type);
390                         case 1:
391                                 return Decompose (source, type);
392                         }
393                 }
394
395                 static byte* props;
396                 static int* mappedChars;
397                 static short* charMapIndex;
398                 static short* helperIndex;
399                 static ushort* mapIdxToComposite;
400                 static byte* combiningClass;
401
402 #if GENERATE_TABLE
403
404                 public static readonly bool IsReady = true; // always
405
406                 static Normalization ()
407                 {
408                         fixed (byte* tmp = propsArr) {
409                                 props = tmp;
410                         }
411                         fixed (int* tmp = mappedCharsArr) {
412                                 mappedChars = tmp;
413                         }
414                         fixed (short* tmp = charMapIndexArr) {
415                                 charMapIndex = tmp;
416                         }
417                         fixed (short* tmp = helperIndexArr) {
418                                 helperIndex = tmp;
419                         }
420                         fixed (ushort* tmp = mapIdxToCompositeArr) {
421                                 mapIdxToComposite = tmp;
422                         }
423                         fixed (byte* tmp = combiningClassArr) {
424                                 combiningClass = tmp;
425                         }
426                 }
427 #else
428
429                 static object forLock = new object ();
430                 public static readonly bool isReady;
431
432                 public static bool IsReady {
433                         get { return isReady; }
434                 }
435
436                 [MethodImpl (MethodImplOptions.InternalCall)]
437                 static extern void load_normalization_resource (
438                         out IntPtr props, out IntPtr mappedChars,
439                         out IntPtr charMapIndex, out IntPtr helperIndex,
440                         out IntPtr mapIdxToComposite, out IntPtr combiningClass);
441
442                 static Normalization ()
443                 {
444                         IntPtr p1, p2, p3, p4, p5, p6;
445                         lock (forLock) {
446                                 load_normalization_resource (out p1, out p2, out p3, out p4, out p5, out p6);
447                                 props = (byte*) p1;
448                                 mappedChars = (int*) p2;
449                                 charMapIndex = (short*) p3;
450                                 helperIndex = (short*) p4;
451                                 mapIdxToComposite = (ushort*) p5;
452                                 combiningClass = (byte*) p6;
453                         }
454
455                         isReady = true;
456                 }
457         }
458 }
459 #endif
460
461                 //
462                 // autogenerated code or icall to fill array runs here
463                 //
464