Fix to UriTemplate.Match to properly handle query parameters without a value. No...
[mono.git] / mcs / class / corlib / System.Globalization / CharUnicodeInfo.cs
1 //
2 // System.Globalization.CharUnicodeInfo.cs
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // (C)2005 Novell Inc,
8 //
9
10 //
11 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining
14 // a copy of this software and associated documentation files (the
15 // "Software"), to deal in the Software without restriction, including
16 // without limitation the rights to use, copy, modify, merge, publish,
17 // distribute, sublicense, and/or sell copies of the Software, and to
18 // permit persons to whom the Software is furnished to do so, subject to
19 // the following conditions:
20 // 
21 // The above copyright notice and this permission notice shall be
22 // included in all copies or substantial portions of the Software.
23 // 
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
28 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
29 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
30 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //
32 using System;
33 using System.Diagnostics.Contracts;
34 using System.Runtime.CompilerServices;
35
36 namespace System.Globalization
37 {
38         public static class CharUnicodeInfo
39         {
40                 static CharUnicodeInfo ()
41                 {
42                         unsafe {
43                                 GetDataTablePointers (CategoryDataVersion,
44                                         out category_data, out category_astral_index, out numeric_data,
45                                         out numeric_data_values, out to_lower_data_low, out to_lower_data_high,
46                                         out to_upper_data_low, out to_upper_data_high);
47                                 category_check_pair = category_astral_index != null
48                                         ? (byte)UnicodeCategory.Surrogate
49                                         : (byte)0xff;
50                         }
51                 }
52
53                 private readonly unsafe static byte *category_data;
54                 private readonly unsafe static ushort *category_astral_index;
55                 private readonly unsafe static byte *numeric_data; // unused
56                 private readonly unsafe static double *numeric_data_values;      // unused
57                 private readonly unsafe static ushort *to_lower_data_low;
58                 private readonly unsafe static ushort *to_lower_data_high;
59                 private readonly unsafe static ushort *to_upper_data_low;
60                 private readonly unsafe static ushort *to_upper_data_high;
61
62                 // UnicodeCategory.Surrogate if astral plane
63                 // categories are available, 0xff otherwise.
64                 private readonly static byte category_check_pair;
65
66                 private const int CategoryDataVersion = 4;
67
68                 [MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.InternalCall)]
69                 private unsafe static extern void GetDataTablePointers (int category_data_version,
70                         out byte *category_data, out ushort *category_astral_index, out byte *numeric_data,
71                         out double *numeric_data_values, out ushort *to_lower_data_low, out ushort *to_lower_data_high,
72                         out ushort *to_upper_data_low, out ushort *to_upper_data_high);
73
74                 public static int GetDecimalDigitValue (char ch)
75                 {
76                         int i = (int) ch;
77                         switch (i) {
78                         // They are not decimal digits but are regarded as they were.
79                         case 178:
80                                 return 2;
81                         case 179:
82                                 return 3;
83                         case 185:
84                                 return 1;
85                         case 8304:
86                                 return 0;
87                         }
88
89                         // They are not decimal digits but are regarded as they were.
90                         if (8308 <= i && i < 8314)
91                                 return i - 8304;
92                         if (8320 <= i && i < 8330)
93                                 return i - 8320;
94
95                         if (!Char.IsDigit (ch))
96                                 return -1;
97
98                         if (i < 58)
99                                 return i - 48;
100
101                         if (i < 1642)
102                                 return i - 1632;
103                         if (i < 1786)
104                                 return i - 1776;
105                         if (i < 2416)
106                                 return i - 2406;
107                         if (i < 2544)
108                                 return i - 2534;
109                         if (i < 2672)
110                                 return i - 2662;
111                         if (i < 2800)
112                                 return i - 2790;
113                         if (i < 2928)
114                                 return i - 2918;
115                         if (i < 3056)
116                                 return i - 3046;
117                         if (i < 3184)
118                                 return i - 3174;
119                         if (i < 3312)
120                                 return i - 3302;
121                         if (i < 3440)
122                                 return i - 3430;
123                         if (i < 3674)
124                                 return i - 3664;
125                         if (i < 3802)
126                                 return i - 3792;
127                         if (i < 3882)
128                                 return i - 3872;
129                         if (i < 4170)
130                                 return i - 4160;
131                         if (i < 4978)
132                                 return i - 4968;
133                         if (i < 6122)
134                                 return i - 6112;
135                         if (i < 6170)
136                                 return i - 6160;
137                         if (i < 8314)
138                                 return i - 8304;
139                         if (i < 8330)
140                                 return i - 8320;
141                         if (i < 65296)
142                                 return -1;
143                         if (i < 65306)
144                                 return i - 65296;
145                         return -1;
146                 }
147
148                 public static int GetDecimalDigitValue (string s, int index)
149                 {
150                         if (s == null)
151                                 throw new ArgumentNullException ("s");
152                         return GetDecimalDigitValue (s [index]);
153                 }
154
155                 public static int GetDigitValue (char ch)
156                 {
157                         int i = GetDecimalDigitValue (ch);
158
159                         if (i >= 0)
160                                 return i;
161                         i = (int) ch;
162
163                         if (i == 9450)
164                                 return 0;
165
166                         // They are False in Char.IsDigit(), but returns a digit
167                         if (i >= 9312 && i < 9321)
168                                 return i - 9311;
169                         if (i >= 9332 && i < 9341)
170                                 return i - 9331;
171                         if (i >= 9352 && i < 9361)
172                                 return i - 9351;
173                         if (i >= 9461 && i < 9470)
174                                 return i - 9460;
175                         if (i >= 10102 && i < 10111)
176                                 return i - 10101;
177                         if (i >= 10112 && i < 10121)
178                                 return i - 10111;
179                         if (i >= 10122 && i < 10131)
180                                 return i - 10121;
181
182                         return -1;
183                 }
184
185                 public static int GetDigitValue (string s, int index)
186                 {
187                         if (s == null)
188                                 throw new ArgumentNullException ("s");
189                         return GetDigitValue (s [index]);
190                 }
191
192                 public static double GetNumericValue (char ch)
193                 {
194                         int i = GetDigitValue (ch);
195                         if (i >= 0)
196                                 return i;
197
198                         i = (int) ch;
199
200                         switch (i) {
201                         case 188:
202                                 return 0.25;
203                         case 189:
204                                 return 0.5;
205                         case 190:
206                                 return 0.75;
207                         case 2548:
208                                 return 1;
209                         case 2549:
210                                 return 2;
211                         case 2550:
212                                 return 3;
213                         case 2551:
214                                 return 4;
215                         case 2553:
216                                 return 16;
217                         case 3056:
218                                 return 10;
219                         case 3057:
220                                 return 100;
221                         case 3058:
222                                 return 1000;
223                         case 4988:
224                                 return 10000;
225                         case 5870:
226                                 return 17;
227                         case 5871:
228                                 return 18;
229                         case 5872:
230                                 return 19;
231                         case 8531:
232                                 return 1.0 / 3;
233                         case 8532:
234                                 return 2.0 / 3;
235                         case 8537:
236                                 return 1.0 / 6;
237                         case 8538:
238                                 return 5.0 / 6;
239                         case 8539:
240                                 return 1.0 / 8;
241                         case 8540:
242                                 return 3.0 / 8;
243                         case 8541:
244                                 return 5.0 / 8;
245                         case 8542:
246                                 return 7.0 / 8;
247                         case 8543:
248                                 return 1;
249                         case 8556:
250                                 return 50;
251                         case 8557:
252                                 return 100;
253                         case 8558:
254                                 return 500;
255                         case 8559:
256                                 return 1000;
257                         case 8572:
258                                 return 50;
259                         case 8573:
260                                 return 100;
261                         case 8574:
262                                 return 500;
263                         case 8575:
264                                 return 1000;
265                         case 8576:
266                                 return 1000;
267                         case 8577:
268                                 return 5000;
269                         case 8578:
270                                 return 10000;
271                         case 9470: // IsNumber(c) is False BTW.
272                         case 10111:
273                         case 10121:
274                         case 10131:
275                                 return 10;
276                         case 12295:
277                                 return 0;
278                         case 12344:
279                                 return 10;
280                         case 12345:
281                                 return 20;
282                         case 12346:
283                                 return 30;
284                         }
285
286                         // They are not True by IsNumber() but regarded as they were.
287                         if (9451 <= i && i < 9461)
288                                 return i - 9440;
289                         if (12321 <= i && i < 12330)
290                                 return i - 12320;
291                         if (12881 <= i && i < 12896)
292                                 return i - 12860;
293                         if (12977 <= i && i < 12992)
294                                 return i - 12941;
295
296                         if (!char.IsNumber (ch))
297                                 return -1;
298
299                         if (i < 3891)
300                                 return 0.5 + i - 3882;
301                         if (i < 4988)
302                                 return (i - 4977) * 10;
303                         if (i < 8537)
304                                 return 0.2 * (i - 8532);
305                         if (i < 8556)
306                                 return i - 8543;
307                         if (i < 8572)
308                                 return i - 8559;
309                         if (i < 9332)
310                                 return i - 9311;
311                         if (i < 9352)
312                                 return i - 9331;
313                         if (i < 9372)
314                                 return i - 9351;
315                         if (i < 12694)
316                                 return i - 12689;
317                         if (i < 12842)
318                                 return i - 12831;
319                         if (i < 12938)
320                                 return i - 12927;
321
322                         return -1;
323                 }
324
325                 public static double GetNumericValue (string s, int index)
326                 {
327                         if (s == null)
328                                 throw new ArgumentNullException ("s");
329                         if (((uint)index)>=((uint)s.Length))
330                                 throw new ArgumentOutOfRangeException("index");
331                         return GetNumericValue (s [index]);
332                 }
333
334                 public static UnicodeCategory GetUnicodeCategory (char ch)
335                 {
336                         return (InternalGetUnicodeCategory(ch)) ;
337                 }
338
339                 public static UnicodeCategory GetUnicodeCategory (string s, int index)
340                 {
341                         if (s==null)
342                                 throw new ArgumentNullException("s");
343                         if (((uint)index)>=((uint)s.Length)) {
344                                 throw new ArgumentOutOfRangeException("index");
345                         }
346                         Contract.EndContractBlock();
347                         return InternalGetUnicodeCategory(s, index);
348                 }
349
350                 internal static char ToLowerInvariant (char c)
351                 {
352                         unsafe {
353                                 if (c <= ((char)0x24cf))
354                                         return (char) to_lower_data_low [c];
355                                 if (c >= ((char)0xff21))
356                                         return (char) to_lower_data_high[c - 0xff21];
357                         }
358                         return c;
359                 }
360
361                 public static char ToUpperInvariant (char c)
362                 {
363                         unsafe {
364                                 if (c <= ((char)0x24e9))
365                                         return (char) to_upper_data_low [c];
366                                 if (c >= ((char)0xff21))
367                                         return (char) to_upper_data_high [c - 0xff21];
368                         }
369                         return c;
370                 }
371
372                 internal unsafe static UnicodeCategory InternalGetUnicodeCategory (int ch)
373                 {
374                         return (UnicodeCategory)(category_data [ch]);
375                 }
376
377                 internal static UnicodeCategory InternalGetUnicodeCategory (string value, int index) {
378                         Contract.Assert(value != null, "value can not be null");
379                         Contract.Assert(index < value.Length, "index < value.Length");
380
381                         UnicodeCategory c = GetUnicodeCategory (value [index]);
382                         if ((byte)c == category_check_pair &&
383                                 Char.IsSurrogatePair (value, index)) {
384                                 int u = Char.ConvertToUtf32 (value [index], value [index + 1]);
385                                 unsafe {
386                                         // ConvertToUtf32 guarantees 0x10000 <= u <= 0x10ffff
387                                         int x = (category_astral_index [(u - 0x10000) >> 8] << 8) + (u & 0xff);
388
389                                         c = (UnicodeCategory)category_data [x];
390                                 }
391                         }
392
393                         return c;
394                 }
395
396                 internal const char  HIGH_SURROGATE_START  = '\ud800';
397                 internal const char  HIGH_SURROGATE_END    = '\udbff';
398                 internal const char  LOW_SURROGATE_START   = '\udc00';
399                 internal const char  LOW_SURROGATE_END     = '\udfff';
400
401                 internal static bool IsWhiteSpace(String s, int index)
402                 {
403                         Contract.Assert(s != null, "s!=null");
404                         Contract.Assert(index >= 0 && index < s.Length, "index >= 0 && index < s.Length");
405
406                         UnicodeCategory uc = GetUnicodeCategory(s, index);
407                         // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
408                         // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
409                         switch (uc) {
410                                 case (UnicodeCategory.SpaceSeparator):
411                                 case (UnicodeCategory.LineSeparator):
412                                 case (UnicodeCategory.ParagraphSeparator):
413                                         return (true);
414                         }
415                         return (false);
416                 }
417
418
419                 internal static bool IsWhiteSpace(char c)
420                 {
421                         UnicodeCategory uc = GetUnicodeCategory(c);
422                         // In Unicode 3.0, U+2028 is the only character which is under the category "LineSeparator".
423                         // And U+2029 is th eonly character which is under the category "ParagraphSeparator".
424                         switch (uc) {
425                                 case (UnicodeCategory.SpaceSeparator):
426                                 case (UnicodeCategory.LineSeparator):
427                                 case (UnicodeCategory.ParagraphSeparator):
428                                         return (true);
429                         }
430
431                         return (false);
432                 }
433         }
434 }