2 // System.Globalization.CharUnicodeInfo.cs
5 // Atsushi Enomoto <atsushi@ximian.com>
11 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
13 // Permission is hereby granted, free of charge, to any person obtaining
14 // a copy of this software and associated documentation files (the
15 // "Software"), to deal in the Software without restriction, including
16 // without limitation the rights to use, copy, modify, merge, publish,
17 // distribute, sublicense, and/or sell copies of the Software, and to
18 // permit persons to whom the Software is furnished to do so, subject to
19 // the following conditions:
21 // The above copyright notice and this permission notice shall be
22 // included in all copies or substantial portions of the Software.
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
28 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
29 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
30 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Diagnostics.Contracts;
34 using System.Runtime.CompilerServices;
36 namespace System.Globalization
38 public static class CharUnicodeInfo
40 static CharUnicodeInfo ()
43 GetDataTablePointers (CategoryDataVersion,
44 out category_data, out category_astral_index, out numeric_data,
45 out numeric_data_values, out to_lower_data_low, out to_lower_data_high,
46 out to_upper_data_low, out to_upper_data_high);
47 category_check_pair = category_astral_index != null
48 ? (byte)UnicodeCategory.Surrogate
53 private readonly unsafe static byte *category_data;
54 private readonly unsafe static ushort *category_astral_index;
55 private readonly unsafe static byte *numeric_data; // unused
56 private readonly unsafe static double *numeric_data_values; // unused
57 private readonly unsafe static ushort *to_lower_data_low;
58 private readonly unsafe static ushort *to_lower_data_high;
59 private readonly unsafe static ushort *to_upper_data_low;
60 private readonly unsafe static ushort *to_upper_data_high;
62 // UnicodeCategory.Surrogate if astral plane
63 // categories are available, 0xff otherwise.
64 private readonly static byte category_check_pair;
66 private const int CategoryDataVersion = 4;
68 [MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.InternalCall)]
69 private unsafe static extern void GetDataTablePointers (int category_data_version,
70 out byte *category_data, out ushort *category_astral_index, out byte *numeric_data,
71 out double *numeric_data_values, out ushort *to_lower_data_low, out ushort *to_lower_data_high,
72 out ushort *to_upper_data_low, out ushort *to_upper_data_high);
74 public static int GetDecimalDigitValue (char ch)
78 // They are not decimal digits but are regarded as they were.
89 // They are not decimal digits but are regarded as they were.
90 if (8308 <= i && i < 8314)
92 if (8320 <= i && i < 8330)
95 if (!Char.IsDigit (ch))
148 public static int GetDecimalDigitValue (string s, int index)
151 throw new ArgumentNullException ("s");
152 return GetDecimalDigitValue (s [index]);
155 public static int GetDigitValue (char ch)
157 int i = GetDecimalDigitValue (ch);
166 // They are False in Char.IsDigit(), but returns a digit
167 if (i >= 9312 && i < 9321)
169 if (i >= 9332 && i < 9341)
171 if (i >= 9352 && i < 9361)
173 if (i >= 9461 && i < 9470)
175 if (i >= 10102 && i < 10111)
177 if (i >= 10112 && i < 10121)
179 if (i >= 10122 && i < 10131)
185 public static int GetDigitValue (string s, int index)
188 throw new ArgumentNullException ("s");
189 return GetDigitValue (s [index]);
192 public static double GetNumericValue (char ch)
194 int i = GetDigitValue (ch);
271 case 9470: // IsNumber(c) is False BTW.
286 // They are not True by IsNumber() but regarded as they were.
287 if (9451 <= i && i < 9461)
289 if (12321 <= i && i < 12330)
291 if (12881 <= i && i < 12896)
293 if (12977 <= i && i < 12992)
296 if (!char.IsNumber (ch))
300 return 0.5 + i - 3882;
302 return (i - 4977) * 10;
304 return 0.2 * (i - 8532);
325 public static double GetNumericValue (string s, int index)
328 throw new ArgumentNullException ("s");
329 if (((uint)index)>=((uint)s.Length))
330 throw new ArgumentOutOfRangeException("index");
331 return GetNumericValue (s [index]);
334 public static UnicodeCategory GetUnicodeCategory (char ch)
336 return (InternalGetUnicodeCategory(ch)) ;
339 public static UnicodeCategory GetUnicodeCategory (string s, int index)
342 throw new ArgumentNullException("s");
343 if (((uint)index)>=((uint)s.Length)) {
344 throw new ArgumentOutOfRangeException("index");
346 Contract.EndContractBlock();
347 return InternalGetUnicodeCategory(s, index);
350 internal static char ToLowerInvariant (char c)
353 if (c <= ((char)0x24cf))
354 return (char) to_lower_data_low [c];
355 if (c >= ((char)0xff21))
356 return (char) to_lower_data_high[c - 0xff21];
361 public static char ToUpperInvariant (char c)
364 if (c <= ((char)0x24e9))
365 return (char) to_upper_data_low [c];
366 if (c >= ((char)0xff21))
367 return (char) to_upper_data_high [c - 0xff21];
372 internal unsafe static UnicodeCategory InternalGetUnicodeCategory (int ch)
374 return (UnicodeCategory)(category_data [ch]);
377 internal static UnicodeCategory InternalGetUnicodeCategory (string value, int index) {
378 Contract.Assert(value != null, "value can not be null");
379 Contract.Assert(index < value.Length, "index < value.Length");
381 UnicodeCategory c = GetUnicodeCategory (value [index]);
382 if ((byte)c == category_check_pair &&
383 Char.IsSurrogatePair (value, index)) {
384 int u = Char.ConvertToUtf32 (value [index], value [index + 1]);
386 // ConvertToUtf32 guarantees 0x10000 <= u <= 0x10ffff
387 int x = (category_astral_index [(u - 0x10000) >> 8] << 8) + (u & 0xff);
389 c = (UnicodeCategory)category_data [x];
396 internal const char HIGH_SURROGATE_START = '\ud800';
397 internal const char HIGH_SURROGATE_END = '\udbff';
398 internal const char LOW_SURROGATE_START = '\udc00';
399 internal const char LOW_SURROGATE_END = '\udfff';
401 internal static bool IsWhiteSpace(String s, int index)
403 Contract.Assert(s != null, "s!=null");
404 Contract.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length");
406 UnicodeCategory uc = GetUnicodeCategory(s, index);
407 // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
408 // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
410 case (UnicodeCategory.SpaceSeparator):
411 case (UnicodeCategory.LineSeparator):
412 case (UnicodeCategory.ParagraphSeparator):
419 internal static bool IsWhiteSpace(char c)
421 UnicodeCategory uc = GetUnicodeCategory(c);
422 // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
423 // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
425 case (UnicodeCategory.SpaceSeparator):
426 case (UnicodeCategory.LineSeparator):
427 case (UnicodeCategory.ParagraphSeparator):