Implement MachineKey.Protect and MachineKey.Unprotect
[mono.git] / mcs / class / System / System.Text.RegularExpressions / category.cs
1 //\r
2 // assembly:    System\r
3 // namespace:   System.Text.RegularExpressions\r
4 // file:        category.cs\r
5 //\r
6 // author:      Dan Lewis (dlewis@gmx.co.uk)\r
7 //              (c) 2002\r
8 \r
9 //\r
10 // Permission is hereby granted, free of charge, to any person obtaining\r
11 // a copy of this software and associated documentation files (the\r
12 // "Software"), to deal in the Software without restriction, including\r
13 // without limitation the rights to use, copy, modify, merge, publish,\r
14 // distribute, sublicense, and/or sell copies of the Software, and to\r
15 // permit persons to whom the Software is furnished to do so, subject to\r
16 // the following conditions:\r
17 // \r
18 // The above copyright notice and this permission notice shall be\r
19 // included in all copies or substantial portions of the Software.\r
20 // \r
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE\r
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\r
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\r
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r
28 //\r
29 \r
30 using System;\r
31 using System.Globalization;\r
32 \r
33 namespace System.Text.RegularExpressions {\r
34 \r
35         enum Category : ushort {\r
36                 None,\r
37 \r
38                 // canonical classes\r
39         \r
40                 Any,                    // any character except newline         .\r
41                 AnySingleline,          // any character                        . (s option)\r
42                 Word,                   // any word character                   \w\r
43                 Digit,                  // any digit character                  \d\r
44                 WhiteSpace,             // any whitespace character             \s\r
45                 \r
46                 // ECMAScript classes\r
47 \r
48 \r
49                 EcmaAny,\r
50                 EcmaAnySingleline,\r
51                 EcmaWord,               // [a-zA-Z_0-9]\r
52                 EcmaDigit,              // [0-9]\r
53                 EcmaWhiteSpace,         // [ \f\n\r\t\v]\r
54 \r
55                 // unicode categories\r
56                 \r
57                 UnicodeL,               // Letter\r
58                 UnicodeM,               // Mark\r
59                 UnicodeN,               // Number\r
60                 UnicodeZ,               // Separator\r
61                 UnicodeP,               // Punctuation\r
62                 UnicodeS,               // Symbol\r
63                 UnicodeC,               // Other\r
64 \r
65                 UnicodeLu,              // UppercaseLetter\r
66                 UnicodeLl,              // LowercaseLetter\r
67                 UnicodeLt,              // TitlecaseLetter\r
68                 UnicodeLm,              // ModifierLetter\r
69                 UnicodeLo,              // OtherLetter\r
70                 UnicodeMn,              // NonspacingMark\r
71                 UnicodeMe,              // EnclosingMark\r
72                 UnicodeMc,              // SpacingMark\r
73                 UnicodeNd,              // DecimalNumber\r
74                 UnicodeNl,              // LetterNumber\r
75                 UnicodeNo,              // OtherNumber\r
76                 UnicodeZs,              // SpaceSeparator\r
77                 UnicodeZl,              // LineSeparator\r
78                 UnicodeZp,              // ParagraphSeparator\r
79                 UnicodePd,              // DashPunctuation\r
80                 UnicodePs,              // OpenPunctuation\r
81                 UnicodePi,              // InitialPunctuation\r
82                 UnicodePe,              // ClosePunctuation\r
83                 UnicodePf,              // FinalPunctuation\r
84                 UnicodePc,              // ConnectorPunctuation\r
85                 UnicodePo,              // OtherPunctuation\r
86                 UnicodeSm,              // MathSymbol\r
87                 UnicodeSc,              // CurrencySymbol\r
88                 UnicodeSk,              // ModifierSymbol\r
89                 UnicodeSo,              // OtherSymbol\r
90                 UnicodeCc,              // Control\r
91                 UnicodeCf,              // Format\r
92                 UnicodeCo,              // PrivateUse\r
93                 UnicodeCs,              // Surrogate\r
94                 UnicodeCn,              // Unassigned\r
95 \r
96                 // unicode block ranges\r
97 \r
98                 // notes: the categories marked with a star are valid unicode block ranges,\r
99                 // but don't seem to be accepted by the MS parser using the /p{...} format.\r
100                 // any ideas?\r
101 \r
102                 UnicodeBasicLatin,\r
103                 UnicodeLatin1Supplement,                        // *\r
104                 UnicodeLatinExtendedA,                          // *\r
105                 UnicodeLatinExtendedB,                          // *\r
106                 UnicodeIPAExtensions,\r
107                 UnicodeSpacingModifierLetters,\r
108                 UnicodeCombiningDiacriticalMarks,\r
109                 UnicodeGreek,\r
110                 UnicodeCyrillic,\r
111                 UnicodeArmenian,\r
112                 UnicodeHebrew,\r
113                 UnicodeArabic,\r
114                 UnicodeSyriac,\r
115                 UnicodeThaana,\r
116                 UnicodeDevanagari,\r
117                 UnicodeBengali,\r
118                 UnicodeGurmukhi,\r
119                 UnicodeGujarati,\r
120                 UnicodeOriya,\r
121                 UnicodeTamil,\r
122                 UnicodeTelugu,\r
123                 UnicodeKannada,\r
124                 UnicodeMalayalam,\r
125                 UnicodeSinhala,\r
126                 UnicodeThai,\r
127                 UnicodeLao,\r
128                 UnicodeTibetan,\r
129                 UnicodeMyanmar,\r
130                 UnicodeGeorgian,\r
131                 UnicodeHangulJamo,\r
132                 UnicodeEthiopic,\r
133                 UnicodeCherokee,\r
134                 UnicodeUnifiedCanadianAboriginalSyllabics,\r
135                 UnicodeOgham,\r
136                 UnicodeRunic,\r
137                 UnicodeKhmer,\r
138                 UnicodeMongolian,\r
139                 UnicodeLatinExtendedAdditional,\r
140                 UnicodeGreekExtended,\r
141                 UnicodeGeneralPunctuation,\r
142                 UnicodeSuperscriptsandSubscripts,\r
143                 UnicodeCurrencySymbols,\r
144                 UnicodeCombiningMarksforSymbols,\r
145                 UnicodeLetterlikeSymbols,\r
146                 UnicodeNumberForms,\r
147                 UnicodeArrows,\r
148                 UnicodeMathematicalOperators,\r
149                 UnicodeMiscellaneousTechnical,\r
150                 UnicodeControlPictures,\r
151                 UnicodeOpticalCharacterRecognition,\r
152                 UnicodeEnclosedAlphanumerics,\r
153                 UnicodeBoxDrawing,\r
154                 UnicodeBlockElements,\r
155                 UnicodeGeometricShapes,\r
156                 UnicodeMiscellaneousSymbols,\r
157                 UnicodeDingbats,\r
158                 UnicodeBraillePatterns,\r
159                 UnicodeCJKRadicalsSupplement,\r
160                 UnicodeKangxiRadicals,\r
161                 UnicodeIdeographicDescriptionCharacters,\r
162                 UnicodeCJKSymbolsandPunctuation,\r
163                 UnicodeHiragana,\r
164                 UnicodeKatakana,\r
165                 UnicodeBopomofo,\r
166                 UnicodeHangulCompatibilityJamo,\r
167                 UnicodeKanbun,\r
168                 UnicodeBopomofoExtended,\r
169                 UnicodeEnclosedCJKLettersandMonths,\r
170                 UnicodeCJKCompatibility,\r
171                 UnicodeCJKUnifiedIdeographsExtensionA,\r
172                 UnicodeCJKUnifiedIdeographs,\r
173                 UnicodeYiSyllables,\r
174                 UnicodeYiRadicals,\r
175                 UnicodeHangulSyllables,\r
176                 UnicodeHighSurrogates,\r
177                 UnicodeHighPrivateUseSurrogates,\r
178                 UnicodeLowSurrogates,\r
179                 UnicodePrivateUse,\r
180                 UnicodeCJKCompatibilityIdeographs,\r
181                 UnicodeAlphabeticPresentationForms,\r
182                 UnicodeArabicPresentationFormsA,                // *\r
183                 UnicodeCombiningHalfMarks,\r
184                 UnicodeCJKCompatibilityForms,\r
185                 UnicodeSmallFormVariants,\r
186                 UnicodeArabicPresentationFormsB,                // *\r
187                 UnicodeSpecials,\r
188                 UnicodeHalfwidthandFullwidthForms,\r
189                 \r
190                 UnicodeOldItalic,\r
191                 UnicodeGothic,\r
192                 UnicodeDeseret,\r
193                 UnicodeByzantineMusicalSymbols,\r
194                 UnicodeMusicalSymbols,\r
195                 UnicodeMathematicalAlphanumericSymbols,\r
196                 UnicodeCJKUnifiedIdeographsExtensionB,\r
197                 UnicodeCJKCompatibilityIdeographsSupplement,\r
198                 UnicodeTags,\r
199 \r
200                 LastValue // Keep this with the higher value in the enumeration\r
201         }\r
202 \r
203         class CategoryUtils {\r
204                 public static Category CategoryFromName (string name) {\r
205                         try {\r
206                                 if (name.StartsWith ("Is"))     // remove prefix from block range\r
207                                         name = name.Substring (2);\r
208 \r
209                                 return (Category) Enum.Parse (typeof (Category), "Unicode" + name, false);\r
210                         }\r
211                         catch (ArgumentException) {\r
212                                 return Category.None;\r
213                         }\r
214                 }\r
215         \r
216                 public static bool IsCategory (Category cat, char c) {\r
217                         switch (cat) {\r
218                         case Category.None:\r
219                                 return false;\r
220                         \r
221                         case Category.Any:\r
222                                 return c != '\n';\r
223 \r
224                         case Category.AnySingleline:\r
225                                 return true;\r
226 \r
227                         case Category.Word:\r
228                                 return\r
229                                         Char.IsLetterOrDigit (c) ||\r
230                                         IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
231 \r
232                         case Category.Digit:\r
233                                 return Char.IsDigit (c);\r
234 \r
235                         case Category.WhiteSpace:\r
236                                 return Char.IsWhiteSpace (c);\r
237 \r
238                         // ECMA categories\r
239 \r
240                         case Category.EcmaAny:\r
241                                 return c != '\n';\r
242                                 \r
243                         case Category.EcmaAnySingleline:\r
244                                 return true;\r
245 \r
246                         case Category.EcmaWord:\r
247                                 return\r
248                                         'a' <= c && c <= 'z' ||\r
249                                         'A' <= c && c <= 'Z' ||\r
250                                         '0' <= c && c <= '9' ||\r
251                                         '_' == c;\r
252 \r
253                         case Category.EcmaDigit:\r
254                                 return\r
255                                         '0' <= c && c <= '9';\r
256                         \r
257                         case Category.EcmaWhiteSpace:\r
258                                 return\r
259                                         c == ' '  ||\r
260                                         c == '\f' ||\r
261                                         c == '\n' ||\r
262                                         c == '\r' ||\r
263                                         c == '\t' ||\r
264                                         c == '\v';\r
265 \r
266                         // Unicode categories...\r
267 \r
268                         // letter\r
269                         \r
270                         case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);\r
271                         case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);\r
272                         case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);\r
273                         case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);\r
274                         case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);\r
275 \r
276                         // mark\r
277 \r
278                         case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);\r
279                         case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);\r
280                         case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
281                         case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);\r
282 \r
283                         // number\r
284 \r
285                         case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);\r
286                         case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);\r
287 \r
288                         // separator\r
289 \r
290                         case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);\r
291                         case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);\r
292                         case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
293 \r
294                         // punctuation\r
295 \r
296                         case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);\r
297                         case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);\r
298                         case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);\r
299                         case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);\r
300                         case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);\r
301                         case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
302                         case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);\r
303 \r
304                         // symbol\r
305 \r
306                         case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);\r
307                         case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);\r
308                         case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);\r
309                         case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);\r
310 \r
311                         // other\r
312 \r
313                         case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);\r
314                         case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);\r
315                         case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);\r
316                         case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);\r
317                         case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c); \r
318 \r
319                         case Category.UnicodeL: // letter\r
320                                 return\r
321                                         IsCategory (UnicodeCategory.UppercaseLetter, c) ||\r
322                                         IsCategory (UnicodeCategory.LowercaseLetter, c) ||\r
323                                         IsCategory (UnicodeCategory.TitlecaseLetter, c) ||\r
324                                         IsCategory (UnicodeCategory.ModifierLetter, c) ||\r
325                                         IsCategory (UnicodeCategory.OtherLetter, c);\r
326                         \r
327                         case Category.UnicodeM: // mark\r
328                                 return\r
329                                         IsCategory (UnicodeCategory.NonSpacingMark, c) ||\r
330                                         IsCategory (UnicodeCategory.EnclosingMark, c) ||\r
331                                         IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
332 \r
333                         case Category.UnicodeN: // number\r
334                                 return\r
335                                         IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||\r
336                                         IsCategory (UnicodeCategory.LetterNumber, c) ||\r
337                                         IsCategory (UnicodeCategory.OtherNumber, c);\r
338 \r
339                         case Category.UnicodeZ: // separator\r
340                                 return\r
341                                         IsCategory (UnicodeCategory.SpaceSeparator, c) ||\r
342                                         IsCategory (UnicodeCategory.LineSeparator, c) ||\r
343                                         IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
344                                         \r
345                         case Category.UnicodeP: // punctuation\r
346                                 return\r
347                                         IsCategory (UnicodeCategory.DashPunctuation, c) ||\r
348                                         IsCategory (UnicodeCategory.OpenPunctuation, c) ||\r
349                                         IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||\r
350                                         IsCategory (UnicodeCategory.ClosePunctuation, c) ||\r
351                                         IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||\r
352                                         IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||\r
353                                         IsCategory (UnicodeCategory.OtherPunctuation, c);\r
354                         \r
355                         case Category.UnicodeS: // symbol\r
356                                 return\r
357                                         IsCategory (UnicodeCategory.MathSymbol, c) ||\r
358                                         IsCategory (UnicodeCategory.CurrencySymbol, c) ||\r
359                                         IsCategory (UnicodeCategory.ModifierSymbol, c) ||\r
360                                         IsCategory (UnicodeCategory.OtherSymbol, c);\r
361                         \r
362                         case Category.UnicodeC: // other\r
363                                 return\r
364                                         IsCategory (UnicodeCategory.Control, c) ||\r
365                                         IsCategory (UnicodeCategory.Format, c) ||\r
366                                         IsCategory (UnicodeCategory.PrivateUse, c) ||\r
367                                         IsCategory (UnicodeCategory.Surrogate, c) ||\r
368                                         IsCategory (UnicodeCategory.OtherNotAssigned, c);\r
369 \r
370                         // Unicode block ranges...\r
371 \r
372                         case Category.UnicodeBasicLatin:\r
373                                 return '\u0000' <= c && c <= '\u007F';\r
374 \r
375                         case Category.UnicodeLatin1Supplement:\r
376                                 return '\u0080' <= c && c <= '\u00FF';\r
377 \r
378                         case Category.UnicodeLatinExtendedA:\r
379                                 return '\u0100' <= c && c <= '\u017F';\r
380 \r
381                         case Category.UnicodeLatinExtendedB:\r
382                                 return '\u0180' <= c && c <= '\u024F';\r
383 \r
384                         case Category.UnicodeIPAExtensions:\r
385                                 return '\u0250' <= c && c <= '\u02AF';\r
386 \r
387                         case Category.UnicodeSpacingModifierLetters:\r
388                                 return '\u02B0' <= c && c <= '\u02FF';\r
389 \r
390                         case Category.UnicodeCombiningDiacriticalMarks:\r
391                                 return '\u0300' <= c && c <= '\u036F';\r
392 \r
393                         case Category.UnicodeGreek:\r
394                                 return '\u0370' <= c && c <= '\u03FF';\r
395 \r
396                         case Category.UnicodeCyrillic:\r
397                                 return '\u0400' <= c && c <= '\u04FF';\r
398 \r
399                         case Category.UnicodeArmenian:\r
400                                 return '\u0530' <= c && c <= '\u058F';\r
401 \r
402                         case Category.UnicodeHebrew:\r
403                                 return '\u0590' <= c && c <= '\u05FF';\r
404 \r
405                         case Category.UnicodeArabic:\r
406                                 return '\u0600' <= c && c <= '\u06FF';\r
407 \r
408                         case Category.UnicodeSyriac:\r
409                                 return '\u0700' <= c && c <= '\u074F';\r
410 \r
411                         case Category.UnicodeThaana:\r
412                                 return '\u0780' <= c && c <= '\u07BF';\r
413 \r
414                         case Category.UnicodeDevanagari:\r
415                                 return '\u0900' <= c && c <= '\u097F';\r
416 \r
417                         case Category.UnicodeBengali:\r
418                                 return '\u0980' <= c && c <= '\u09FF';\r
419 \r
420                         case Category.UnicodeGurmukhi:\r
421                                 return '\u0A00' <= c && c <= '\u0A7F';\r
422 \r
423                         case Category.UnicodeGujarati:\r
424                                 return '\u0A80' <= c && c <= '\u0AFF';\r
425 \r
426                         case Category.UnicodeOriya:\r
427                                 return '\u0B00' <= c && c <= '\u0B7F';\r
428 \r
429                         case Category.UnicodeTamil:\r
430                                 return '\u0B80' <= c && c <= '\u0BFF';\r
431 \r
432                         case Category.UnicodeTelugu:\r
433                                 return '\u0C00' <= c && c <= '\u0C7F';\r
434 \r
435                         case Category.UnicodeKannada:\r
436                                 return '\u0C80' <= c && c <= '\u0CFF';\r
437 \r
438                         case Category.UnicodeMalayalam:\r
439                                 return '\u0D00' <= c && c <= '\u0D7F';\r
440 \r
441                         case Category.UnicodeSinhala:\r
442                                 return '\u0D80' <= c && c <= '\u0DFF';\r
443 \r
444                         case Category.UnicodeThai:\r
445                                 return '\u0E00' <= c && c <= '\u0E7F';\r
446 \r
447                         case Category.UnicodeLao:\r
448                                 return '\u0E80' <= c && c <= '\u0EFF';\r
449 \r
450                         case Category.UnicodeTibetan:\r
451                                 return '\u0F00' <= c && c <= '\u0FFF';\r
452 \r
453                         case Category.UnicodeMyanmar:\r
454                                 return '\u1000' <= c && c <= '\u109F';\r
455 \r
456                         case Category.UnicodeGeorgian:\r
457                                 return '\u10A0' <= c && c <= '\u10FF';\r
458 \r
459                         case Category.UnicodeHangulJamo:\r
460                                 return '\u1100' <= c && c <= '\u11FF';\r
461 \r
462                         case Category.UnicodeEthiopic:\r
463                                 return '\u1200' <= c && c <= '\u137F';\r
464 \r
465                         case Category.UnicodeCherokee:\r
466                                 return '\u13A0' <= c && c <= '\u13FF';\r
467 \r
468                         case Category.UnicodeUnifiedCanadianAboriginalSyllabics:\r
469                                 return '\u1400' <= c && c <= '\u167F';\r
470 \r
471                         case Category.UnicodeOgham:\r
472                                 return '\u1680' <= c && c <= '\u169F';\r
473 \r
474                         case Category.UnicodeRunic:\r
475                                 return '\u16A0' <= c && c <= '\u16FF';\r
476 \r
477                         case Category.UnicodeKhmer:\r
478                                 return '\u1780' <= c && c <= '\u17FF';\r
479 \r
480                         case Category.UnicodeMongolian:\r
481                                 return '\u1800' <= c && c <= '\u18AF';\r
482 \r
483                         case Category.UnicodeLatinExtendedAdditional:\r
484                                 return '\u1E00' <= c && c <= '\u1EFF';\r
485 \r
486                         case Category.UnicodeGreekExtended:\r
487                                 return '\u1F00' <= c && c <= '\u1FFF';\r
488 \r
489                         case Category.UnicodeGeneralPunctuation:\r
490                                 return '\u2000' <= c && c <= '\u206F';\r
491 \r
492                         case Category.UnicodeSuperscriptsandSubscripts:\r
493                                 return '\u2070' <= c && c <= '\u209F';\r
494 \r
495                         case Category.UnicodeCurrencySymbols:\r
496                                 return '\u20A0' <= c && c <= '\u20CF';\r
497 \r
498                         case Category.UnicodeCombiningMarksforSymbols:\r
499                                 return '\u20D0' <= c && c <= '\u20FF';\r
500 \r
501                         case Category.UnicodeLetterlikeSymbols:\r
502                                 return '\u2100' <= c && c <= '\u214F';\r
503 \r
504                         case Category.UnicodeNumberForms:\r
505                                 return '\u2150' <= c && c <= '\u218F';\r
506 \r
507                         case Category.UnicodeArrows:\r
508                                 return '\u2190' <= c && c <= '\u21FF';\r
509 \r
510                         case Category.UnicodeMathematicalOperators:\r
511                                 return '\u2200' <= c && c <= '\u22FF';\r
512 \r
513                         case Category.UnicodeMiscellaneousTechnical:\r
514                                 return '\u2300' <= c && c <= '\u23FF';\r
515 \r
516                         case Category.UnicodeControlPictures:\r
517                                 return '\u2400' <= c && c <= '\u243F';\r
518 \r
519                         case Category.UnicodeOpticalCharacterRecognition:\r
520                                 return '\u2440' <= c && c <= '\u245F';\r
521 \r
522                         case Category.UnicodeEnclosedAlphanumerics:\r
523                                 return '\u2460' <= c && c <= '\u24FF';\r
524 \r
525                         case Category.UnicodeBoxDrawing:\r
526                                 return '\u2500' <= c && c <= '\u257F';\r
527 \r
528                         case Category.UnicodeBlockElements:\r
529                                 return '\u2580' <= c && c <= '\u259F';\r
530 \r
531                         case Category.UnicodeGeometricShapes:\r
532                                 return '\u25A0' <= c && c <= '\u25FF';\r
533 \r
534                         case Category.UnicodeMiscellaneousSymbols:\r
535                                 return '\u2600' <= c && c <= '\u26FF';\r
536 \r
537                         case Category.UnicodeDingbats:\r
538                                 return '\u2700' <= c && c <= '\u27BF';\r
539 \r
540                         case Category.UnicodeBraillePatterns:\r
541                                 return '\u2800' <= c && c <= '\u28FF';\r
542 \r
543                         case Category.UnicodeCJKRadicalsSupplement:\r
544                                 return '\u2E80' <= c && c <= '\u2EFF';\r
545 \r
546                         case Category.UnicodeKangxiRadicals:\r
547                                 return '\u2F00' <= c && c <= '\u2FDF';\r
548 \r
549                         case Category.UnicodeIdeographicDescriptionCharacters:\r
550                                 return '\u2FF0' <= c && c <= '\u2FFF';\r
551 \r
552                         case Category.UnicodeCJKSymbolsandPunctuation:\r
553                                 return '\u3000' <= c && c <= '\u303F';\r
554 \r
555                         case Category.UnicodeHiragana:\r
556                                 return '\u3040' <= c && c <= '\u309F';\r
557 \r
558                         case Category.UnicodeKatakana:\r
559                                 return '\u30A0' <= c && c <= '\u30FF';\r
560 \r
561                         case Category.UnicodeBopomofo:\r
562                                 return '\u3100' <= c && c <= '\u312F';\r
563 \r
564                         case Category.UnicodeHangulCompatibilityJamo:\r
565                                 return '\u3130' <= c && c <= '\u318F';\r
566 \r
567                         case Category.UnicodeKanbun:\r
568                                 return '\u3190' <= c && c <= '\u319F';\r
569 \r
570                         case Category.UnicodeBopomofoExtended:\r
571                                 return '\u31A0' <= c && c <= '\u31BF';\r
572 \r
573                         case Category.UnicodeEnclosedCJKLettersandMonths:\r
574                                 return '\u3200' <= c && c <= '\u32FF';\r
575 \r
576                         case Category.UnicodeCJKCompatibility:\r
577                                 return '\u3300' <= c && c <= '\u33FF';\r
578 \r
579                         case Category.UnicodeCJKUnifiedIdeographsExtensionA:\r
580                                 return '\u3400' <= c && c <= '\u4DB5';\r
581 \r
582                         case Category.UnicodeCJKUnifiedIdeographs:\r
583                                 return '\u4E00' <= c && c <= '\u9FFF';\r
584 \r
585                         case Category.UnicodeYiSyllables:\r
586                                 return '\uA000' <= c && c <= '\uA48F';\r
587 \r
588                         case Category.UnicodeYiRadicals:\r
589                                 return '\uA490' <= c && c <= '\uA4CF';\r
590 \r
591                         case Category.UnicodeHangulSyllables:\r
592                                 return '\uAC00' <= c && c <= '\uD7A3';\r
593 \r
594                         case Category.UnicodeHighSurrogates:\r
595                                 return '\uD800' <= c && c <= '\uDB7F';\r
596 \r
597                         case Category.UnicodeHighPrivateUseSurrogates:\r
598                                 return '\uDB80' <= c && c <= '\uDBFF';\r
599 \r
600                         case Category.UnicodeLowSurrogates:\r
601                                 return '\uDC00' <= c && c <= '\uDFFF';\r
602 \r
603                         case Category.UnicodePrivateUse:\r
604                                 return '\uE000' <= c && c <= '\uF8FF';\r
605 \r
606                         case Category.UnicodeCJKCompatibilityIdeographs:\r
607                                 return '\uF900' <= c && c <= '\uFAFF';\r
608 \r
609                         case Category.UnicodeAlphabeticPresentationForms:\r
610                                 return '\uFB00' <= c && c <= '\uFB4F';\r
611 \r
612                         case Category.UnicodeArabicPresentationFormsA:\r
613                                 return '\uFB50' <= c && c <= '\uFDFF';\r
614 \r
615                         case Category.UnicodeCombiningHalfMarks:\r
616                                 return '\uFE20' <= c && c <= '\uFE2F';\r
617 \r
618                         case Category.UnicodeCJKCompatibilityForms:\r
619                                 return '\uFE30' <= c && c <= '\uFE4F';\r
620 \r
621                         case Category.UnicodeSmallFormVariants:\r
622                                 return '\uFE50' <= c && c <= '\uFE6F';\r
623 \r
624                         case Category.UnicodeArabicPresentationFormsB:\r
625                                 return '\uFE70' <= c && c <= '\uFEFE';\r
626 \r
627                         case Category.UnicodeHalfwidthandFullwidthForms:\r
628                                 return '\uFF00' <= c && c <= '\uFFEF';\r
629 \r
630                         case Category.UnicodeSpecials:\r
631                                 return\r
632                                         '\uFEFF' <= c && c <= '\uFEFF' ||\r
633                                         '\uFFF0' <= c && c <= '\uFFFD';\r
634 \r
635                         // these block ranges begin above 0x10000\r
636 \r
637                         case Category.UnicodeOldItalic:\r
638                         case Category.UnicodeGothic:\r
639                         case Category.UnicodeDeseret:\r
640                         case Category.UnicodeByzantineMusicalSymbols:\r
641                         case Category.UnicodeMusicalSymbols:\r
642                         case Category.UnicodeMathematicalAlphanumericSymbols:\r
643                         case Category.UnicodeCJKUnifiedIdeographsExtensionB:\r
644                         case Category.UnicodeCJKCompatibilityIdeographsSupplement:\r
645                         case Category.UnicodeTags:\r
646                                 return false;\r
647 \r
648                         default:\r
649                                 return false;\r
650                         }\r
651                 }\r
652 \r
653                 private static bool IsCategory (UnicodeCategory uc, char c) {\r
654                         if (Char.GetUnicodeCategory (c) == uc)\r
655                                 return true;\r
656 \r
657                         return false;\r
658                 }\r
659         }\r
660 }\r