2 using System.Globalization;
5 namespace Mono.Globalization.Unicode
7 internal enum NormalizationCheck {
13 internal /*static*/ class Normalization
15 public const int NoNfd = 1;
16 public const int NoNfkd = 2;
17 public const int NoNfc = 4;
18 public const int MaybeNfc = 8;
19 public const int NoNfkc = 16;
20 public const int MaybeNfkc = 32;
21 public const int ExpandOnNfd = 64;
22 public const int ExpandOnNfc = 128;
23 public const int ExpandOnNfkd = 256;
24 public const int ExpandOnNfkc = 512;
25 public const int FullCompositionExclusion = 1024;
27 private delegate NormalizationCheck Checker (char c);
29 private static Checker noNfd = new Checker (IsNfd);
30 private static Checker noNfc = new Checker (IsNfc);
31 private static Checker noNfkd = new Checker (IsNfkd);
32 private static Checker noNfkc = new Checker (IsNfkc);
34 static int PropIdx (int cp)
36 return propIdx [NormalizationTableUtil.PropIdx (cp)];
39 static int MapIdx (int cp)
41 return mapIndex [NormalizationTableUtil.MapIdx (cp)];
44 static int GetComposedStringLength (int mapIdx)
47 while (mappedChars [i] != 0)
52 static int GetPrimaryCompositeCharIndex (object chars, int start, int charsLength)
54 string s = chars as string;
55 StringBuilder sb = chars as StringBuilder;
56 char startCh = s != null ? s [start] : sb [start];
58 int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
61 while (idx < mappedChars.Length &&
62 mappedChars [idx] == startCh) {
63 for (int i = 1; ; i++) {
64 if (mappedChars [idx + i] == 0)
67 if (start + i < charsLength)
69 char curCh = s != null ?
70 s [start + i] : sb [start + i];
71 if (mappedChars [idx + i] == curCh)
73 if (mappedChars [idx + i] > curCh)
75 // otherwise move idx to next item
76 while (mappedChars [i] != 0)
82 // reached to end of entries
86 private static string Compose (string source, Checker checker)
88 StringBuilder sb = null;
89 Decompose (source, ref sb, checker);
91 sb = Combine (source, 0, checker);
93 Combine (sb, 0, checker);
95 return sb != null ? sb.ToString () : source;
98 private static StringBuilder Combine (string source, int start, Checker checker)
100 for (int i = 0; i < source.Length; i++) {
101 if (checker (source [i]) == NormalizationCheck.Yes)
103 StringBuilder sb = new StringBuilder (source.Length);
105 Combine (sb, 0, checker);
111 private static void Combine (StringBuilder sb, int start, Checker checker)
113 for (int i = start; i < sb.Length; i++) {
114 switch (checker (sb [i])) {
115 case NormalizationCheck.Yes:
117 case NormalizationCheck.No:
119 case NormalizationCheck.Maybe:
123 // x is starter, or sb[i] is blocked
127 int idx = GetPrimaryComposite (sb, (int) sb [i], sb.Length, x, ref ch);
130 sb.Remove (x, GetComposedStringLength (idx));
131 sb.Insert (x, (char) ch);
132 i--; // apply recursively
136 static int GetPrimaryComposite (object o, int cur, int length, int bufferPos, ref int ch)
138 if ((propValue [PropIdx (cur)] & FullCompositionExclusion) != 0)
140 if (GetCombiningClass (cur) != 0)
141 return 0; // not a starter
142 int idx = GetPrimaryCompositeCharIndex (o, bufferPos, length);
145 return GetPrimaryCompositeFromMapIndex (idx);
148 static bool IsNormalized (string source,
152 for (int i = 0; i < source.Length; i++) {
153 int cc = GetCombiningClass (source [i]);
154 if (cc != 0 && cc < prevCC)
157 switch (checker (source [i])) {
158 case NormalizationCheck.Yes:
160 case NormalizationCheck.No:
162 case NormalizationCheck.Maybe:
164 if (GetPrimaryComposite (source,
165 source [i], source.Length,
174 static string Decompose (string source, Checker checker)
176 StringBuilder sb = null;
177 Decompose (source, ref sb, checker);
178 return sb != null ? sb.ToString () : source;
181 static void Decompose (string source,
182 ref StringBuilder sb, Checker checker)
186 for (int i = 0; i < source.Length; i++)
187 if (checker (source [i]) == NormalizationCheck.No)
188 DecomposeChar (ref sb, ref buf, source,
191 sb.Append (source, start, source.Length - start);
192 ReorderCanonical (source, ref sb, 1);
195 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
198 // check only with src.
199 for (int i = 1; i < src.Length; i++) {
200 int level = GetCombiningClass (src [i]);
203 if (GetCombiningClass (src [i - 1]) > level) {
204 sb = new StringBuilder (src.Length);
205 sb.Append (src, 0, i - 1);
206 ReorderCanonical (src, ref sb, i);
212 // check only with sb
213 for (int i = start; i < sb.Length; i++) {
214 int level = GetCombiningClass (sb [i]);
217 if (GetCombiningClass (sb [i - 1]) > level) {
221 i--; // apply recursively
226 static void DecomposeChar (ref StringBuilder sb,
227 ref int [] buf, string s, int i, ref int start)
230 sb = new StringBuilder (s.Length + 100);
231 sb.Append (s, start, i - start);
234 GetCanonical (s [i], buf, 0);
235 for (int x = 0; ; x++) {
238 if (buf [x] < char.MaxValue)
239 sb.Append ((char) buf [x]);
241 sb.Append ((char) (buf [x] >> 10 + 0xD800));
242 sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
248 public static NormalizationCheck IsNfd (char c)
250 return (propValue [PropIdx ((int) c)] & NoNfd) == 0 ?
251 NormalizationCheck.Yes : NormalizationCheck.No;
254 public static NormalizationCheck IsNfc (char c)
256 uint v = propValue [PropIdx ((int) c)];
257 return (v & NoNfc) == 0 ?
258 (v & MaybeNfc) == 0 ?
259 NormalizationCheck.Yes :
260 NormalizationCheck.Maybe :
261 NormalizationCheck.No;
264 public static NormalizationCheck IsNfkd (char c)
266 return (propValue [PropIdx ((int) c)] & NoNfkd) == 0 ?
267 NormalizationCheck.Yes : NormalizationCheck.No;
270 public static NormalizationCheck IsNfkc (char c)
272 uint v = propValue [PropIdx ((int) c)];
273 return (v & NoNfkc) == 0 ?
274 (v & MaybeNfkc) == 0 ?
275 NormalizationCheck.Yes :
276 NormalizationCheck.Maybe :
277 NormalizationCheck.No;
280 /* for now we don't use FC_NFKC closure
281 public static bool IsMultiForm (char c)
283 return (propValue [PropIdx ((int) c)] & 0xF0000000) != 0;
286 public static char SingleForm (char c)
288 uint v = propValue [PropIdx ((int) c)];
289 int idx = (int) ((v & 0x7FFF0000) >> 16);
290 return (char) singleNorm [idx];
293 public static void MultiForm (char c, char [] buf, int index)
295 // FIXME: handle surrogate
296 uint v = propValue [PropIdx ((int) c)];
297 int midx = (int) ((v & 0x7FFF0000) >> 16);
298 buf [index] = (char) multiNorm [midx];
299 buf [index + 1] = (char) multiNorm [midx + 1];
300 buf [index + 2] = (char) multiNorm [midx + 2];
301 buf [index + 3] = (char) multiNorm [midx + 3];
302 if (buf [index + 3] != 0)
303 buf [index + 4] = (char) 0; // zero termination
307 public static void GetCanonical (int c, int [] buf, int bufIdx)
309 for (int i = MapIdx (c); mappedChars [i] != 0; i++)
310 buf [bufIdx++] = mappedChars [i];
311 buf [bufIdx] = (char) 0;
314 public static bool IsNormalized (string source, int type)
318 return IsNormalized (source, noNfc);
320 return IsNormalized (source, noNfd);
322 return IsNormalized (source, noNfkc);
324 return IsNormalized (source, noNfkd);
328 public static string Normalize (string source, int type)
332 return Compose (source, noNfc);
334 return Decompose (source, noNfd);
336 return Compose (source, noNfkc);
338 return Decompose (source, noNfkd);
342 // below are autogenerated code.