2005-07-27 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / Normalization.template
1 using System;
2 using System.Globalization;
3 using System.Text;
4
5 namespace Mono.Globalization.Unicode
6 {
7         internal enum NormalizationCheck {
8                 Yes,
9                 No,
10                 Maybe
11         }
12
13         internal /*static*/ class Normalization
14         {
15                 public const int NoNfd = 1;
16                 public const int NoNfkd = 2;
17                 public const int NoNfc = 4;
18                 public const int MaybeNfc = 8;
19                 public const int NoNfkc = 16;
20                 public const int MaybeNfkc = 32;
21                 public const int ExpandOnNfd = 64;
22                 public const int ExpandOnNfc = 128;
23                 public const int ExpandOnNfkd = 256;
24                 public const int ExpandOnNfkc = 512;
25                 public const int FullCompositionExclusion = 1024;
26
27                 private delegate NormalizationCheck Checker (char c);
28
29                 private static Checker noNfd = new Checker (IsNfd);
30                 private static Checker noNfc = new Checker (IsNfc);
31                 private static Checker noNfkd = new Checker (IsNfkd);
32                 private static Checker noNfkc = new Checker (IsNfkc);
33
34                 static int PropIdx (int cp)
35                 {
36                         return propIdx [NormalizationTableUtil.PropIdx (cp)];
37                 }
38
39                 static int MapIdx (int cp)
40                 {
41                         return mapIndex [NormalizationTableUtil.MapIdx (cp)];
42                 }
43
44                 static int GetComposedStringLength (int mapIdx)
45                 {
46                         int i = mapIdx;
47                         while (mappedChars [i] != 0)
48                                 i++;
49                         return i - mapIdx;
50                 }
51
52                 static int GetPrimaryCompositeCharIndex (object chars, int start, int charsLength)
53                 {
54                         string s = chars as string;
55                         StringBuilder sb = chars as StringBuilder;
56                         char startCh = s != null ? s [start] : sb [start];
57
58                         int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
59                         if (idx == 0)
60                                 return 0;
61                         while (idx < mappedChars.Length &&
62                                 mappedChars [idx] == startCh) {
63                                 for (int i = 1; ; i++) {
64                                         if (mappedChars [idx + i] == 0)
65                                                 // match
66                                                 return idx;
67                                         if (start + i < charsLength)
68                                                 return 0; // no match
69                                         char curCh = s != null ?
70                                                 s [start + i] : sb [start + i];
71                                         if (mappedChars [idx + i] == curCh)
72                                                 continue;
73                                         if (mappedChars [idx + i] > curCh)
74                                                 return 0; // no match
75                                         // otherwise move idx to next item
76                                         while (mappedChars [i] != 0)
77                                                 i++;
78                                         idx = i + 1;
79                                         break;
80                                 }
81                         }
82                         // reached to end of entries
83                         return 0;
84                 }
85
86                 private static string Compose (string source, Checker checker)
87                 {
88                         StringBuilder sb = null;
89                         Decompose (source, ref sb, checker);
90                         if (sb == null)
91                                 sb = Combine (source, 0, checker);
92                         else
93                                 Combine (sb, 0, checker);
94
95                         return sb != null ? sb.ToString () : source;
96                 }
97
98                 private static StringBuilder Combine (string source, int start, Checker checker)
99                 {
100                         for (int i = 0; i < source.Length; i++) {
101                                 if (checker (source [i]) == NormalizationCheck.Yes)
102                                         continue;
103                                 StringBuilder sb = new StringBuilder (source.Length);
104                                 sb.Append (source);
105                                 Combine (sb, 0, checker);
106                                 return sb;
107                         }
108                         return null;
109                 }
110                 
111                 private static void Combine (StringBuilder sb, int start, Checker checker)
112                 {
113                         for (int i = start; i < sb.Length; i++) {
114                                 switch (checker (sb [i])) {
115                                 case NormalizationCheck.Yes:
116                                         continue;
117                                 case NormalizationCheck.No:
118                                         break;
119                                 case NormalizationCheck.Maybe:
120                                         break;
121                                 }
122
123                                 // x is starter, or sb[i] is blocked
124                                 int x = i - 1;
125
126                                 int ch = 0;
127                                 int idx = GetPrimaryComposite (sb, (int) sb [i], sb.Length, x, ref ch);
128                                 if (idx == 0)
129                                         continue;
130                                 sb.Remove (x, GetComposedStringLength (idx));
131                                 sb.Insert (x, (char) ch);
132                                 i--; // apply recursively
133                         }
134                 }
135
136                 static int GetPrimaryComposite (object o, int cur, int length, int bufferPos, ref int ch)
137                 {
138                         if ((propValue [PropIdx (cur)] & FullCompositionExclusion) != 0)
139                                 return 0;
140                         if (GetCombiningClass (cur) != 0)
141                                 return 0; // not a starter
142                         int idx = GetPrimaryCompositeCharIndex (o, bufferPos, length);
143                         if (idx == 0)
144                                 return 0;
145                         return GetPrimaryCompositeFromMapIndex (idx);
146                 }
147
148                 static bool IsNormalized (string source,
149                         Checker checker)
150                 {
151                         int prevCC = -1;
152                         for (int i = 0; i < source.Length; i++) {
153                                 int cc = GetCombiningClass (source [i]);
154                                 if (cc != 0 && cc < prevCC)
155                                         return false;
156                                 prevCC = cc;
157                                 switch (checker (source [i])) {
158                                 case NormalizationCheck.Yes:
159                                         break;
160                                 case NormalizationCheck.No:
161                                         return false;
162                                 case NormalizationCheck.Maybe:
163                                         int ch = 0;
164                                         if (GetPrimaryComposite (source,
165                                                 source [i], source.Length,
166                                                 i, ref ch) != 0)
167                                                 return false;
168                                         break;
169                                 }
170                         }
171                         return true;
172                 }
173
174                 static string Decompose (string source, Checker checker)
175                 {
176                         StringBuilder sb = null;
177                         Decompose (source, ref sb, checker);
178                         return sb != null ? sb.ToString () : source;
179                 }
180
181                 static void Decompose (string source,
182                         ref StringBuilder sb, Checker checker)
183                 {
184                         int [] buf = null;
185                         int start = 0;
186                         for (int i = 0; i < source.Length; i++)
187                                 if (checker (source [i]) == NormalizationCheck.No)
188                                         DecomposeChar (ref sb, ref buf, source,
189                                                 i, ref start);
190                         if (sb != null)
191                                 sb.Append (source, start, source.Length - start);
192                         ReorderCanonical (source, ref sb, 1);
193                 }
194
195                 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
196                 {
197                         if (sb == null) {
198                                 // check only with src.
199                                 for (int i = 1; i < src.Length; i++) {
200                                         int level = GetCombiningClass (src [i]);
201                                         if (level == 0)
202                                                 continue;
203                                         if (GetCombiningClass (src [i - 1]) > level) {
204                                                 sb = new StringBuilder (src.Length);
205                                                 sb.Append (src, 0, i - 1);
206                                                 ReorderCanonical (src, ref sb, i);
207                                                 return;
208                                         }
209                                 }
210                                 return;
211                         }
212                         // check only with sb
213                         for (int i = start; i < sb.Length; i++) {
214                                 int level = GetCombiningClass (sb [i]);
215                                 if (level == 0)
216                                         continue;
217                                 if (GetCombiningClass (sb [i - 1]) > level) {
218                                         char c = sb [i - 1];
219                                         sb [i - 1] = sb [i];
220                                         sb [i] = c;
221                                         i--; // apply recursively
222                                 }
223                         }
224                 }
225
226                 static void DecomposeChar (ref StringBuilder sb,
227                         ref int [] buf, string s, int i, ref int start)
228                 {
229                         if (sb == null)
230                                 sb = new StringBuilder (s.Length + 100);
231                         sb.Append (s, start, i - start);
232                         if (buf == null)
233                                 buf = new int [5];
234                         GetCanonical (s [i], buf, 0);
235                         for (int x = 0; ; x++) {
236                                 if (buf [x] == 0)
237                                         break;
238                                 if (buf [x] < char.MaxValue)
239                                         sb.Append ((char) buf [x]);
240                                 else { // surrogate
241                                         sb.Append ((char) (buf [x] >> 10 + 0xD800));
242                                         sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
243                                 }
244                         }
245                         start = i + 1;
246                 }
247
248                 public static NormalizationCheck IsNfd (char c)
249                 {
250                         return (propValue [PropIdx ((int) c)] & NoNfd) == 0 ?
251                                 NormalizationCheck.Yes : NormalizationCheck.No;
252                 }
253
254                 public static NormalizationCheck IsNfc (char c)
255                 {
256                         uint v = propValue [PropIdx ((int) c)];
257                         return (v & NoNfc) == 0 ?
258                                 (v & MaybeNfc) == 0 ?
259                                 NormalizationCheck.Yes :
260                                 NormalizationCheck.Maybe :
261                                 NormalizationCheck.No;
262                 }
263
264                 public static NormalizationCheck IsNfkd (char c)
265                 {
266                         return (propValue [PropIdx ((int) c)] & NoNfkd) == 0 ?
267                                 NormalizationCheck.Yes : NormalizationCheck.No;
268                 }
269
270                 public static NormalizationCheck IsNfkc (char c)
271                 {
272                         uint v = propValue [PropIdx ((int) c)];
273                         return (v & NoNfkc) == 0 ?
274                                 (v & MaybeNfkc) == 0 ?
275                                 NormalizationCheck.Yes :
276                                 NormalizationCheck.Maybe :
277                                 NormalizationCheck.No;
278                 }
279
280                 /* for now we don't use FC_NFKC closure
281                 public static bool IsMultiForm (char c)
282                 {
283                         return (propValue [PropIdx ((int) c)] & 0xF0000000) != 0;
284                 }
285
286                 public static char SingleForm (char c)
287                 {
288                         uint v = propValue [PropIdx ((int) c)];
289                         int idx = (int) ((v & 0x7FFF0000) >> 16);
290                         return (char) singleNorm [idx];
291                 }
292
293                 public static void MultiForm (char c, char [] buf, int index)
294                 {
295                         // FIXME: handle surrogate
296                         uint v = propValue [PropIdx ((int) c)];
297                         int midx = (int) ((v & 0x7FFF0000) >> 16);
298                         buf [index] = (char) multiNorm [midx];
299                         buf [index + 1] = (char) multiNorm [midx + 1];
300                         buf [index + 2] = (char) multiNorm [midx + 2];
301                         buf [index + 3] = (char) multiNorm [midx + 3];
302                         if (buf [index + 3] != 0)
303                                 buf [index + 4] = (char) 0; // zero termination
304                 }
305                 */
306
307                 public static void GetCanonical (int c, int [] buf, int bufIdx)
308                 {
309                         for (int i = MapIdx (c); mappedChars [i] != 0; i++)
310                                 buf [bufIdx++] = mappedChars [i];
311                         buf [bufIdx] = (char) 0;
312                 }
313
314                 public static bool IsNormalized (string source, int type)
315                 {
316                         switch (type) {
317                         default:
318                                 return IsNormalized (source, noNfc);
319                         case 1:
320                                 return IsNormalized (source, noNfd);
321                         case 2:
322                                 return IsNormalized (source, noNfkc);
323                         case 3:
324                                 return IsNormalized (source, noNfkd);
325                         }
326                 }
327
328                 public static string Normalize (string source, int type)
329                 {
330                         switch (type) {
331                         default:
332                                 return Compose (source, noNfc);
333                         case 1:
334                                 return Decompose (source, noNfd);
335                         case 2:
336                                 return Compose (source, noNfkc);
337                         case 3:
338                                 return Decompose (source, noNfkd);
339                         }
340                 }
341
342         // below are autogenerated code.
343