2005-07-28 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / Normalization.template
1 using System;
2 using System.Globalization;
3 using System.Text;
4
5 using Util = Mono.Globalization.Unicode.NormalizationTableUtil;
6
7 namespace Mono.Globalization.Unicode
8 {
9         internal enum NormalizationCheck {
10                 Yes,
11                 No,
12                 Maybe
13         }
14
15         internal /*static*/ class Normalization
16         {
17                 public const int NoNfd = 1;
18                 public const int NoNfkd = 2;
19                 public const int NoNfc = 4;
20                 public const int MaybeNfc = 8;
21                 public const int NoNfkc = 16;
22                 public const int MaybeNfkc = 32;
23                 public const int ExpandOnNfd = 64;
24                 public const int ExpandOnNfc = 128;
25                 public const int ExpandOnNfkd = 256;
26                 public const int ExpandOnNfkc = 512;
27                 public const int FullCompositionExclusion = 1024;
28
29                 private delegate NormalizationCheck Checker (char c);
30
31                 private static Checker noNfd = new Checker (IsNfd);
32                 private static Checker noNfc = new Checker (IsNfc);
33                 private static Checker noNfkd = new Checker (IsNfkd);
34                 private static Checker noNfkc = new Checker (IsNfkc);
35
36                 static int PropIdx (int cp)
37                 {
38                         return propIdx [Util.PropIdx (cp)];
39                 }
40
41                 static int MapIdx (int cp)
42                 {
43                         return mapIndex [Util.MapIdx (cp)];
44                 }
45
46                 static int GetComposedStringLength (int mapIdx)
47                 {
48                         int i = mapIdx;
49                         while (mappedChars [i] != 0)
50                                 i++;
51                         return i - mapIdx;
52                 }
53
54                 static byte GetCombiningClass (int c)
55                 {
56                         return combiningClass [Util.ToIdx (c)];
57                 }
58
59                 static int GetPrimaryCompositeCharIndex (object chars, int start, int charsLength)
60                 {
61                         string s = chars as string;
62                         StringBuilder sb = chars as StringBuilder;
63                         char startCh = s != null ? s [start] : sb [start];
64
65                         int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
66                         if (idx == 0)
67                                 return 0;
68                         while (idx < mappedChars.Length &&
69                                 mappedChars [idx] == startCh) {
70                                 for (int i = 1; ; i++) {
71                                         if (mappedChars [idx + i] == 0)
72                                                 // match
73                                                 return idx;
74                                         if (start + i < charsLength)
75                                                 return 0; // no match
76                                         char curCh = s != null ?
77                                                 s [start + i] : sb [start + i];
78                                         if (mappedChars [idx + i] == curCh)
79                                                 continue;
80                                         if (mappedChars [idx + i] > curCh)
81                                                 return 0; // no match
82                                         // otherwise move idx to next item
83                                         while (mappedChars [i] != 0)
84                                                 i++;
85                                         idx = i + 1;
86                                         break;
87                                 }
88                         }
89                         // reached to end of entries
90                         return 0;
91                 }
92
93                 private static string Compose (string source, Checker checker)
94                 {
95                         StringBuilder sb = null;
96                         Decompose (source, ref sb, checker);
97                         if (sb == null)
98                                 sb = Combine (source, 0, checker);
99                         else
100                                 Combine (sb, 0, checker);
101
102                         return sb != null ? sb.ToString () : source;
103                 }
104
105                 private static StringBuilder Combine (string source, int start, Checker checker)
106                 {
107                         for (int i = 0; i < source.Length; i++) {
108                                 if (checker (source [i]) == NormalizationCheck.Yes)
109                                         continue;
110                                 StringBuilder sb = new StringBuilder (source.Length);
111                                 sb.Append (source);
112                                 Combine (sb, 0, checker);
113                                 return sb;
114                         }
115                         return null;
116                 }
117                 
118                 private static void Combine (StringBuilder sb, int start, Checker checker)
119                 {
120                         for (int i = start; i < sb.Length; i++) {
121                                 switch (checker (sb [i])) {
122                                 case NormalizationCheck.Yes:
123                                         continue;
124                                 case NormalizationCheck.No:
125                                         break;
126                                 case NormalizationCheck.Maybe:
127                                         break;
128                                 }
129
130                                 // x is starter, or sb[i] is blocked
131                                 int x = i - 1;
132
133                                 int ch = 0;
134                                 int idx = GetPrimaryComposite (sb, (int) sb [i], sb.Length, x, ref ch);
135                                 if (idx == 0)
136                                         continue;
137                                 sb.Remove (x, GetComposedStringLength (idx));
138                                 sb.Insert (x, (char) ch);
139                                 i--; // apply recursively
140                         }
141                 }
142
143                 static int GetPrimaryComposite (object o, int cur, int length, int bufferPos, ref int ch)
144                 {
145                         if ((propValue [PropIdx (cur)] & FullCompositionExclusion) != 0)
146                                 return 0;
147                         if (GetCombiningClass (cur) != 0)
148                                 return 0; // not a starter
149                         int idx = GetPrimaryCompositeCharIndex (o, bufferPos, length);
150                         if (idx == 0)
151                                 return 0;
152                         return GetPrimaryCompositeFromMapIndex (idx);
153                 }
154
155                 static bool IsNormalized (string source,
156                         Checker checker)
157                 {
158                         int prevCC = -1;
159                         for (int i = 0; i < source.Length; i++) {
160                                 int cc = GetCombiningClass (source [i]);
161                                 if (cc != 0 && cc < prevCC)
162                                         return false;
163                                 prevCC = cc;
164                                 switch (checker (source [i])) {
165                                 case NormalizationCheck.Yes:
166                                         break;
167                                 case NormalizationCheck.No:
168                                         return false;
169                                 case NormalizationCheck.Maybe:
170                                         int ch = 0;
171                                         if (GetPrimaryComposite (source,
172                                                 source [i], source.Length,
173                                                 i, ref ch) != 0)
174                                                 return false;
175                                         break;
176                                 }
177                         }
178                         return true;
179                 }
180
181                 static string Decompose (string source, Checker checker)
182                 {
183                         StringBuilder sb = null;
184                         Decompose (source, ref sb, checker);
185                         return sb != null ? sb.ToString () : source;
186                 }
187
188                 static void Decompose (string source,
189                         ref StringBuilder sb, Checker checker)
190                 {
191                         int [] buf = null;
192                         int start = 0;
193                         for (int i = 0; i < source.Length; i++)
194                                 if (checker (source [i]) == NormalizationCheck.No)
195                                         DecomposeChar (ref sb, ref buf, source,
196                                                 i, ref start);
197                         if (sb != null)
198                                 sb.Append (source, start, source.Length - start);
199                         ReorderCanonical (source, ref sb, 1);
200                 }
201
202                 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
203                 {
204                         if (sb == null) {
205                                 // check only with src.
206                                 for (int i = 1; i < src.Length; i++) {
207                                         int level = GetCombiningClass (src [i]);
208                                         if (level == 0)
209                                                 continue;
210                                         if (GetCombiningClass (src [i - 1]) > level) {
211                                                 sb = new StringBuilder (src.Length);
212                                                 sb.Append (src, 0, i - 1);
213                                                 ReorderCanonical (src, ref sb, i);
214                                                 return;
215                                         }
216                                 }
217                                 return;
218                         }
219                         // check only with sb
220                         for (int i = start; i < sb.Length; i++) {
221                                 int level = GetCombiningClass (sb [i]);
222                                 if (level == 0)
223                                         continue;
224                                 if (GetCombiningClass (sb [i - 1]) > level) {
225                                         char c = sb [i - 1];
226                                         sb [i - 1] = sb [i];
227                                         sb [i] = c;
228                                         i--; // apply recursively
229                                 }
230                         }
231                 }
232
233                 static void DecomposeChar (ref StringBuilder sb,
234                         ref int [] buf, string s, int i, ref int start)
235                 {
236                         if (sb == null)
237                                 sb = new StringBuilder (s.Length + 100);
238                         sb.Append (s, start, i - start);
239                         if (buf == null)
240                                 buf = new int [5];
241                         GetCanonical (s [i], buf, 0);
242                         for (int x = 0; ; x++) {
243                                 if (buf [x] == 0)
244                                         break;
245                                 if (buf [x] < char.MaxValue)
246                                         sb.Append ((char) buf [x]);
247                                 else { // surrogate
248                                         sb.Append ((char) (buf [x] >> 10 + 0xD800));
249                                         sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
250                                 }
251                         }
252                         start = i + 1;
253                 }
254
255                 public static NormalizationCheck IsNfd (char c)
256                 {
257                         return (propValue [PropIdx ((int) c)] & NoNfd) == 0 ?
258                                 NormalizationCheck.Yes : NormalizationCheck.No;
259                 }
260
261                 public static NormalizationCheck IsNfc (char c)
262                 {
263                         uint v = propValue [PropIdx ((int) c)];
264                         return (v & NoNfc) == 0 ?
265                                 (v & MaybeNfc) == 0 ?
266                                 NormalizationCheck.Yes :
267                                 NormalizationCheck.Maybe :
268                                 NormalizationCheck.No;
269                 }
270
271                 public static NormalizationCheck IsNfkd (char c)
272                 {
273                         return (propValue [PropIdx ((int) c)] & NoNfkd) == 0 ?
274                                 NormalizationCheck.Yes : NormalizationCheck.No;
275                 }
276
277                 public static NormalizationCheck IsNfkc (char c)
278                 {
279                         uint v = propValue [PropIdx ((int) c)];
280                         return (v & NoNfkc) == 0 ?
281                                 (v & MaybeNfkc) == 0 ?
282                                 NormalizationCheck.Yes :
283                                 NormalizationCheck.Maybe :
284                                 NormalizationCheck.No;
285                 }
286
287                 /* for now we don't use FC_NFKC closure
288                 public static bool IsMultiForm (char c)
289                 {
290                         return (propValue [PropIdx ((int) c)] & 0xF0000000) != 0;
291                 }
292
293                 public static char SingleForm (char c)
294                 {
295                         uint v = propValue [PropIdx ((int) c)];
296                         int idx = (int) ((v & 0x7FFF0000) >> 16);
297                         return (char) singleNorm [idx];
298                 }
299
300                 public static void MultiForm (char c, char [] buf, int index)
301                 {
302                         // FIXME: handle surrogate
303                         uint v = propValue [PropIdx ((int) c)];
304                         int midx = (int) ((v & 0x7FFF0000) >> 16);
305                         buf [index] = (char) multiNorm [midx];
306                         buf [index + 1] = (char) multiNorm [midx + 1];
307                         buf [index + 2] = (char) multiNorm [midx + 2];
308                         buf [index + 3] = (char) multiNorm [midx + 3];
309                         if (buf [index + 3] != 0)
310                                 buf [index + 4] = (char) 0; // zero termination
311                 }
312                 */
313
314                 public static void GetCanonical (int c, int [] buf, int bufIdx)
315                 {
316                         for (int i = MapIdx (c); mappedChars [i] != 0; i++)
317                                 buf [bufIdx++] = mappedChars [i];
318                         buf [bufIdx] = (char) 0;
319                 }
320
321                 public static bool IsNormalized (string source, int type)
322                 {
323                         switch (type) {
324                         default:
325                                 return IsNormalized (source, noNfc);
326                         case 1:
327                                 return IsNormalized (source, noNfd);
328                         case 2:
329                                 return IsNormalized (source, noNfkc);
330                         case 3:
331                                 return IsNormalized (source, noNfkd);
332                         }
333                 }
334
335                 public static string Normalize (string source, int type)
336                 {
337                         switch (type) {
338                         default:
339                                 return Compose (source, noNfc);
340                         case 1:
341                                 return Decompose (source, noNfd);
342                         case 2:
343                                 return Compose (source, noNfkc);
344                         case 3:
345                                 return Decompose (source, noNfkd);
346                         }
347                 }
348
349         // below are autogenerated code.
350