2 using System.Globalization;
5 using Util = Mono.Globalization.Unicode.NormalizationTableUtil;
7 namespace Mono.Globalization.Unicode
9 internal enum NormalizationCheck {
15 internal /*static*/ class Normalization
17 public const int NoNfd = 1;
18 public const int NoNfkd = 2;
19 public const int NoNfc = 4;
20 public const int MaybeNfc = 8;
21 public const int NoNfkc = 16;
22 public const int MaybeNfkc = 32;
23 public const int ExpandOnNfd = 64;
24 public const int ExpandOnNfc = 128;
25 public const int ExpandOnNfkd = 256;
26 public const int ExpandOnNfkc = 512;
27 public const int FullCompositionExclusion = 1024;
29 private delegate NormalizationCheck Checker (char c);
31 private static Checker noNfd = new Checker (IsNfd);
32 private static Checker noNfc = new Checker (IsNfc);
33 private static Checker noNfkd = new Checker (IsNfkd);
34 private static Checker noNfkc = new Checker (IsNfkc);
36 static int PropIdx (int cp)
38 return propIdx [Util.PropIdx (cp)];
41 static int MapIdx (int cp)
43 return mapIndex [Util.MapIdx (cp)];
46 static int GetComposedStringLength (int mapIdx)
49 while (mappedChars [i] != 0)
54 static byte GetCombiningClass (int c)
56 return combiningClass [Util.ToIdx (c)];
59 static int GetPrimaryCompositeCharIndex (object chars, int start, int charsLength)
61 string s = chars as string;
62 StringBuilder sb = chars as StringBuilder;
63 char startCh = s != null ? s [start] : sb [start];
65 int idx = GetPrimaryCompositeHelperIndex ((int) startCh);
68 while (idx < mappedChars.Length &&
69 mappedChars [idx] == startCh) {
70 for (int i = 1; ; i++) {
71 if (mappedChars [idx + i] == 0)
74 if (start + i < charsLength)
76 char curCh = s != null ?
77 s [start + i] : sb [start + i];
78 if (mappedChars [idx + i] == curCh)
80 if (mappedChars [idx + i] > curCh)
82 // otherwise move idx to next item
83 while (mappedChars [i] != 0)
89 // reached to end of entries
93 private static string Compose (string source, Checker checker)
95 StringBuilder sb = null;
96 Decompose (source, ref sb, checker);
98 sb = Combine (source, 0, checker);
100 Combine (sb, 0, checker);
102 return sb != null ? sb.ToString () : source;
105 private static StringBuilder Combine (string source, int start, Checker checker)
107 for (int i = 0; i < source.Length; i++) {
108 if (checker (source [i]) == NormalizationCheck.Yes)
110 StringBuilder sb = new StringBuilder (source.Length);
112 Combine (sb, 0, checker);
118 private static void Combine (StringBuilder sb, int start, Checker checker)
120 for (int i = start; i < sb.Length; i++) {
121 switch (checker (sb [i])) {
122 case NormalizationCheck.Yes:
124 case NormalizationCheck.No:
126 case NormalizationCheck.Maybe:
130 // x is starter, or sb[i] is blocked
134 int idx = GetPrimaryComposite (sb, (int) sb [i], sb.Length, x, ref ch);
137 sb.Remove (x, GetComposedStringLength (idx));
138 sb.Insert (x, (char) ch);
139 i--; // apply recursively
143 static int GetPrimaryComposite (object o, int cur, int length, int bufferPos, ref int ch)
145 if ((propValue [PropIdx (cur)] & FullCompositionExclusion) != 0)
147 if (GetCombiningClass (cur) != 0)
148 return 0; // not a starter
149 int idx = GetPrimaryCompositeCharIndex (o, bufferPos, length);
152 return GetPrimaryCompositeFromMapIndex (idx);
155 static bool IsNormalized (string source,
159 for (int i = 0; i < source.Length; i++) {
160 int cc = GetCombiningClass (source [i]);
161 if (cc != 0 && cc < prevCC)
164 switch (checker (source [i])) {
165 case NormalizationCheck.Yes:
167 case NormalizationCheck.No:
169 case NormalizationCheck.Maybe:
171 if (GetPrimaryComposite (source,
172 source [i], source.Length,
181 static string Decompose (string source, Checker checker)
183 StringBuilder sb = null;
184 Decompose (source, ref sb, checker);
185 return sb != null ? sb.ToString () : source;
188 static void Decompose (string source,
189 ref StringBuilder sb, Checker checker)
193 for (int i = 0; i < source.Length; i++)
194 if (checker (source [i]) == NormalizationCheck.No)
195 DecomposeChar (ref sb, ref buf, source,
198 sb.Append (source, start, source.Length - start);
199 ReorderCanonical (source, ref sb, 1);
202 static void ReorderCanonical (string src, ref StringBuilder sb, int start)
205 // check only with src.
206 for (int i = 1; i < src.Length; i++) {
207 int level = GetCombiningClass (src [i]);
210 if (GetCombiningClass (src [i - 1]) > level) {
211 sb = new StringBuilder (src.Length);
212 sb.Append (src, 0, i - 1);
213 ReorderCanonical (src, ref sb, i);
219 // check only with sb
220 for (int i = start; i < sb.Length; i++) {
221 int level = GetCombiningClass (sb [i]);
224 if (GetCombiningClass (sb [i - 1]) > level) {
228 i--; // apply recursively
233 static void DecomposeChar (ref StringBuilder sb,
234 ref int [] buf, string s, int i, ref int start)
237 sb = new StringBuilder (s.Length + 100);
238 sb.Append (s, start, i - start);
241 GetCanonical (s [i], buf, 0);
242 for (int x = 0; ; x++) {
245 if (buf [x] < char.MaxValue)
246 sb.Append ((char) buf [x]);
248 sb.Append ((char) (buf [x] >> 10 + 0xD800));
249 sb.Append ((char) ((buf [x] & 0x0FFF) + 0xDC00));
255 public static NormalizationCheck IsNfd (char c)
257 return (propValue [PropIdx ((int) c)] & NoNfd) == 0 ?
258 NormalizationCheck.Yes : NormalizationCheck.No;
261 public static NormalizationCheck IsNfc (char c)
263 uint v = propValue [PropIdx ((int) c)];
264 return (v & NoNfc) == 0 ?
265 (v & MaybeNfc) == 0 ?
266 NormalizationCheck.Yes :
267 NormalizationCheck.Maybe :
268 NormalizationCheck.No;
271 public static NormalizationCheck IsNfkd (char c)
273 return (propValue [PropIdx ((int) c)] & NoNfkd) == 0 ?
274 NormalizationCheck.Yes : NormalizationCheck.No;
277 public static NormalizationCheck IsNfkc (char c)
279 uint v = propValue [PropIdx ((int) c)];
280 return (v & NoNfkc) == 0 ?
281 (v & MaybeNfkc) == 0 ?
282 NormalizationCheck.Yes :
283 NormalizationCheck.Maybe :
284 NormalizationCheck.No;
287 /* for now we don't use FC_NFKC closure
288 public static bool IsMultiForm (char c)
290 return (propValue [PropIdx ((int) c)] & 0xF0000000) != 0;
293 public static char SingleForm (char c)
295 uint v = propValue [PropIdx ((int) c)];
296 int idx = (int) ((v & 0x7FFF0000) >> 16);
297 return (char) singleNorm [idx];
300 public static void MultiForm (char c, char [] buf, int index)
302 // FIXME: handle surrogate
303 uint v = propValue [PropIdx ((int) c)];
304 int midx = (int) ((v & 0x7FFF0000) >> 16);
305 buf [index] = (char) multiNorm [midx];
306 buf [index + 1] = (char) multiNorm [midx + 1];
307 buf [index + 2] = (char) multiNorm [midx + 2];
308 buf [index + 3] = (char) multiNorm [midx + 3];
309 if (buf [index + 3] != 0)
310 buf [index + 4] = (char) 0; // zero termination
314 public static void GetCanonical (int c, int [] buf, int bufIdx)
316 for (int i = MapIdx (c); mappedChars [i] != 0; i++)
317 buf [bufIdx++] = mappedChars [i];
318 buf [bufIdx] = (char) 0;
321 public static bool IsNormalized (string source, int type)
325 return IsNormalized (source, noNfc);
327 return IsNormalized (source, noNfd);
329 return IsNormalized (source, noNfkc);
331 return IsNormalized (source, noNfkd);
335 public static string Normalize (string source, int type)
339 return Compose (source, noNfc);
341 return Decompose (source, noNfd);
343 return Compose (source, noNfkc);
345 return Decompose (source, noNfkd);
349 // below are autogenerated code.