2004-03-17 Francois Beauchemin <beauche@softhome.net>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / category.cs
1 //\r
2 // assembly:    System\r
3 // namespace:   System.Text.RegularExpressions\r
4 // file:        category.cs\r
5 //\r
6 // author:      Dan Lewis (dlewis@gmx.co.uk)\r
7 //              (c) 2002\r
8 \r
9 using System;\r
10 using System.Globalization;\r
11 \r
12 namespace System.Text.RegularExpressions {\r
13 \r
14         enum Category : ushort {\r
15                 None,\r
16 \r
17                 // canonical classes\r
18         \r
19                 Any,                    // any character except newline         .\r
20                 AnySingleline,          // any character                        . (s option)\r
21                 Word,                   // any word character                   \w\r
22                 Digit,                  // any digit character                  \d\r
23                 WhiteSpace,             // any whitespace character             \s\r
24                 \r
25                 // ECMAScript classes\r
26 \r
27 \r
28                 EcmaAny,\r
29                 EcmaAnySingleline,\r
30                 EcmaWord,               // [a-zA-Z_0-9]\r
31                 EcmaDigit,              // [0-9]\r
32                 EcmaWhiteSpace,         // [ \f\n\r\t\v]\r
33 \r
34                 // unicode categories\r
35                 \r
36                 UnicodeL,               // Letter\r
37                 UnicodeM,               // Mark\r
38                 UnicodeN,               // Number\r
39                 UnicodeZ,               // Separator\r
40                 UnicodeP,               // Punctuation\r
41                 UnicodeS,               // Symbol\r
42                 UnicodeC,               // Other\r
43 \r
44                 UnicodeLu,              // UppercaseLetter\r
45                 UnicodeLl,              // LowercaseLetter\r
46                 UnicodeLt,              // TitlecaseLetter\r
47                 UnicodeLm,              // ModifierLetter\r
48                 UnicodeLo,              // OtherLetter\r
49                 UnicodeMn,              // NonspacingMark\r
50                 UnicodeMe,              // EnclosingMark\r
51                 UnicodeMc,              // SpacingMark\r
52                 UnicodeNd,              // DecimalNumber\r
53                 UnicodeNl,              // LetterNumber\r
54                 UnicodeNo,              // OtherNumber\r
55                 UnicodeZs,              // SpaceSeparator\r
56                 UnicodeZl,              // LineSeparator\r
57                 UnicodeZp,              // ParagraphSeparator\r
58                 UnicodePd,              // DashPunctuation\r
59                 UnicodePs,              // OpenPunctuation\r
60                 UnicodePi,              // InitialPunctuation\r
61                 UnicodePe,              // ClosePunctuation\r
62                 UnicodePf,              // FinalPunctuation\r
63                 UnicodePc,              // ConnectorPunctuation\r
64                 UnicodePo,              // OtherPunctuation\r
65                 UnicodeSm,              // MathSymbol\r
66                 UnicodeSc,              // CurrencySymbol\r
67                 UnicodeSk,              // ModifierSymbol\r
68                 UnicodeSo,              // OtherSymbol\r
69                 UnicodeCc,              // Control\r
70                 UnicodeCf,              // Format\r
71                 UnicodeCo,              // PrivateUse\r
72                 UnicodeCs,              // Surrogate\r
73                 UnicodeCn,              // Unassigned\r
74 \r
75                 // unicode block ranges\r
76 \r
77                 // notes: the categories marked with a star are valid unicode block ranges,\r
78                 // but don't seem to be accepted by the MS parser using the /p{...} format.\r
79                 // any ideas?\r
80 \r
81                 UnicodeBasicLatin,\r
82                 UnicodeLatin1Supplement,                        // *\r
83                 UnicodeLatinExtendedA,                          // *\r
84                 UnicodeLatinExtendedB,                          // *\r
85                 UnicodeIPAExtensions,\r
86                 UnicodeSpacingModifierLetters,\r
87                 UnicodeCombiningDiacriticalMarks,\r
88                 UnicodeGreek,\r
89                 UnicodeCyrillic,\r
90                 UnicodeArmenian,\r
91                 UnicodeHebrew,\r
92                 UnicodeArabic,\r
93                 UnicodeSyriac,\r
94                 UnicodeThaana,\r
95                 UnicodeDevanagari,\r
96                 UnicodeBengali,\r
97                 UnicodeGurmukhi,\r
98                 UnicodeGujarati,\r
99                 UnicodeOriya,\r
100                 UnicodeTamil,\r
101                 UnicodeTelugu,\r
102                 UnicodeKannada,\r
103                 UnicodeMalayalam,\r
104                 UnicodeSinhala,\r
105                 UnicodeThai,\r
106                 UnicodeLao,\r
107                 UnicodeTibetan,\r
108                 UnicodeMyanmar,\r
109                 UnicodeGeorgian,\r
110                 UnicodeHangulJamo,\r
111                 UnicodeEthiopic,\r
112                 UnicodeCherokee,\r
113                 UnicodeUnifiedCanadianAboriginalSyllabics,\r
114                 UnicodeOgham,\r
115                 UnicodeRunic,\r
116                 UnicodeKhmer,\r
117                 UnicodeMongolian,\r
118                 UnicodeLatinExtendedAdditional,\r
119                 UnicodeGreekExtended,\r
120                 UnicodeGeneralPunctuation,\r
121                 UnicodeSuperscriptsandSubscripts,\r
122                 UnicodeCurrencySymbols,\r
123                 UnicodeCombiningMarksforSymbols,\r
124                 UnicodeLetterlikeSymbols,\r
125                 UnicodeNumberForms,\r
126                 UnicodeArrows,\r
127                 UnicodeMathematicalOperators,\r
128                 UnicodeMiscellaneousTechnical,\r
129                 UnicodeControlPictures,\r
130                 UnicodeOpticalCharacterRecognition,\r
131                 UnicodeEnclosedAlphanumerics,\r
132                 UnicodeBoxDrawing,\r
133                 UnicodeBlockElements,\r
134                 UnicodeGeometricShapes,\r
135                 UnicodeMiscellaneousSymbols,\r
136                 UnicodeDingbats,\r
137                 UnicodeBraillePatterns,\r
138                 UnicodeCJKRadicalsSupplement,\r
139                 UnicodeKangxiRadicals,\r
140                 UnicodeIdeographicDescriptionCharacters,\r
141                 UnicodeCJKSymbolsandPunctuation,\r
142                 UnicodeHiragana,\r
143                 UnicodeKatakana,\r
144                 UnicodeBopomofo,\r
145                 UnicodeHangulCompatibilityJamo,\r
146                 UnicodeKanbun,\r
147                 UnicodeBopomofoExtended,\r
148                 UnicodeEnclosedCJKLettersandMonths,\r
149                 UnicodeCJKCompatibility,\r
150                 UnicodeCJKUnifiedIdeographsExtensionA,\r
151                 UnicodeCJKUnifiedIdeographs,\r
152                 UnicodeYiSyllables,\r
153                 UnicodeYiRadicals,\r
154                 UnicodeHangulSyllables,\r
155                 UnicodeHighSurrogates,\r
156                 UnicodeHighPrivateUseSurrogates,\r
157                 UnicodeLowSurrogates,\r
158                 UnicodePrivateUse,\r
159                 UnicodeCJKCompatibilityIdeographs,\r
160                 UnicodeAlphabeticPresentationForms,\r
161                 UnicodeArabicPresentationFormsA,                // *\r
162                 UnicodeCombiningHalfMarks,\r
163                 UnicodeCJKCompatibilityForms,\r
164                 UnicodeSmallFormVariants,\r
165                 UnicodeArabicPresentationFormsB,                // *\r
166                 UnicodeSpecials,\r
167                 UnicodeHalfwidthandFullwidthForms,\r
168                 \r
169                 UnicodeOldItalic,\r
170                 UnicodeGothic,\r
171                 UnicodeDeseret,\r
172                 UnicodeByzantineMusicalSymbols,\r
173                 UnicodeMusicalSymbols,\r
174                 UnicodeMathematicalAlphanumericSymbols,\r
175                 UnicodeCJKUnifiedIdeographsExtensionB,\r
176                 UnicodeCJKCompatibilityIdeographsSupplement,\r
177                 UnicodeTags\r
178         }\r
179 \r
180         class CategoryUtils {\r
181                 public static Category CategoryFromName (string name) {\r
182                         try {\r
183                                 if (name.StartsWith ("Is"))     // remove prefix from block range\r
184                                         name = name.Substring (2);\r
185 \r
186                                 return (Category)Enum.Parse (typeof (Category), "Unicode" + name);\r
187                         }\r
188                         catch (ArgumentException) {\r
189                                 return Category.None;\r
190                         }\r
191                 }\r
192         \r
193                 public static bool IsCategory (Category cat, char c) {\r
194                         switch (cat) {\r
195                         case Category.None:\r
196                                 return false;\r
197                         \r
198                         case Category.Any:\r
199                                 return c != '\n';\r
200 \r
201                         case Category.AnySingleline:\r
202                                 return true;\r
203 \r
204                         case Category.Word:\r
205                                 return\r
206                                         Char.IsLetterOrDigit (c) ||\r
207                                         IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
208 \r
209                         case Category.Digit:\r
210                                 return Char.IsDigit (c);\r
211 \r
212                         case Category.WhiteSpace:\r
213                                 return Char.IsWhiteSpace (c);\r
214 \r
215                         // ECMA categories\r
216 \r
217                         case Category.EcmaAny:\r
218                                 return c != '\n';\r
219                                 \r
220                         case Category.EcmaAnySingleline:\r
221                                 return true;\r
222 \r
223                         case Category.EcmaWord:\r
224                                 return\r
225                                         'a' <= c && c <= 'z' ||\r
226                                         'A' <= c && c <= 'Z' ||\r
227                                         '0' <= c && c <= '9' ||\r
228                                         '_' == c;\r
229 \r
230                         case Category.EcmaDigit:\r
231                                 return\r
232                                         '0' <= c && c <= 9;\r
233                         \r
234                         case Category.EcmaWhiteSpace:\r
235                                 return\r
236                                         c == ' '  ||\r
237                                         c == '\f' ||\r
238                                         c == '\n' ||\r
239                                         c == '\r' ||\r
240                                         c == '\t' ||\r
241                                         c == '\v';\r
242 \r
243                         // Unicode categories...\r
244 \r
245                         // letter\r
246                         \r
247                         case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);\r
248                         case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);\r
249                         case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);\r
250                         case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);\r
251                         case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);\r
252 \r
253                         // mark\r
254 \r
255                         case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);\r
256                         case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);\r
257                         case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
258                         case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);\r
259 \r
260                         // number\r
261 \r
262                         case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);\r
263                         case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);\r
264 \r
265                         // separator\r
266 \r
267                         case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);\r
268                         case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);\r
269                         case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
270 \r
271                         // punctuation\r
272 \r
273                         case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);\r
274                         case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);\r
275                         case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);\r
276                         case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);\r
277                         case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);\r
278                         case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
279                         case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);\r
280 \r
281                         // symbol\r
282 \r
283                         case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);\r
284                         case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);\r
285                         case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);\r
286                         case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);\r
287 \r
288                         // other\r
289 \r
290                         case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);\r
291                         case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);\r
292                         case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);\r
293                         case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);\r
294                         case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c); \r
295 \r
296                         case Category.UnicodeL: // letter\r
297                                 return\r
298                                         IsCategory (UnicodeCategory.UppercaseLetter, c) ||\r
299                                         IsCategory (UnicodeCategory.LowercaseLetter, c) ||\r
300                                         IsCategory (UnicodeCategory.TitlecaseLetter, c) ||\r
301                                         IsCategory (UnicodeCategory.ModifierLetter, c) ||\r
302                                         IsCategory (UnicodeCategory.OtherLetter, c);\r
303                         \r
304                         case Category.UnicodeM: // mark\r
305                                 return\r
306                                         IsCategory (UnicodeCategory.NonSpacingMark, c) ||\r
307                                         IsCategory (UnicodeCategory.EnclosingMark, c) ||\r
308                                         IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
309 \r
310                         case Category.UnicodeN: // number\r
311                                 return\r
312                                         IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||\r
313                                         IsCategory (UnicodeCategory.LetterNumber, c) ||\r
314                                         IsCategory (UnicodeCategory.OtherNumber, c);\r
315 \r
316                         case Category.UnicodeZ: // separator\r
317                                 return\r
318                                         IsCategory (UnicodeCategory.SpaceSeparator, c) ||\r
319                                         IsCategory (UnicodeCategory.LineSeparator, c) ||\r
320                                         IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
321                                         \r
322                         case Category.UnicodeP: // punctuation\r
323                                 return\r
324                                         IsCategory (UnicodeCategory.DashPunctuation, c) ||\r
325                                         IsCategory (UnicodeCategory.OpenPunctuation, c) ||\r
326                                         IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||\r
327                                         IsCategory (UnicodeCategory.ClosePunctuation, c) ||\r
328                                         IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||\r
329                                         IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||\r
330                                         IsCategory (UnicodeCategory.OtherPunctuation, c);\r
331                         \r
332                         case Category.UnicodeS: // symbol\r
333                                 return\r
334                                         IsCategory (UnicodeCategory.MathSymbol, c) ||\r
335                                         IsCategory (UnicodeCategory.CurrencySymbol, c) ||\r
336                                         IsCategory (UnicodeCategory.ModifierSymbol, c) ||\r
337                                         IsCategory (UnicodeCategory.OtherSymbol, c);\r
338                         \r
339                         case Category.UnicodeC: // other\r
340                                 return\r
341                                         IsCategory (UnicodeCategory.Control, c) ||\r
342                                         IsCategory (UnicodeCategory.Format, c) ||\r
343                                         IsCategory (UnicodeCategory.PrivateUse, c) ||\r
344                                         IsCategory (UnicodeCategory.Surrogate, c) ||\r
345                                         IsCategory (UnicodeCategory.OtherNotAssigned, c);\r
346 \r
347                         // Unicode block ranges...\r
348 \r
349                         case Category.UnicodeBasicLatin:\r
350                                 return '\u0000' <= c && c <= '\u007F';\r
351 \r
352                         case Category.UnicodeLatin1Supplement:\r
353                                 return '\u0080' <= c && c <= '\u00FF';\r
354 \r
355                         case Category.UnicodeLatinExtendedA:\r
356                                 return '\u0100' <= c && c <= '\u017F';\r
357 \r
358                         case Category.UnicodeLatinExtendedB:\r
359                                 return '\u0180' <= c && c <= '\u024F';\r
360 \r
361                         case Category.UnicodeIPAExtensions:\r
362                                 return '\u0250' <= c && c <= '\u02AF';\r
363 \r
364                         case Category.UnicodeSpacingModifierLetters:\r
365                                 return '\u02B0' <= c && c <= '\u02FF';\r
366 \r
367                         case Category.UnicodeCombiningDiacriticalMarks:\r
368                                 return '\u0300' <= c && c <= '\u036F';\r
369 \r
370                         case Category.UnicodeGreek:\r
371                                 return '\u0370' <= c && c <= '\u03FF';\r
372 \r
373                         case Category.UnicodeCyrillic:\r
374                                 return '\u0400' <= c && c <= '\u04FF';\r
375 \r
376                         case Category.UnicodeArmenian:\r
377                                 return '\u0530' <= c && c <= '\u058F';\r
378 \r
379                         case Category.UnicodeHebrew:\r
380                                 return '\u0590' <= c && c <= '\u05FF';\r
381 \r
382                         case Category.UnicodeArabic:\r
383                                 return '\u0600' <= c && c <= '\u06FF';\r
384 \r
385                         case Category.UnicodeSyriac:\r
386                                 return '\u0700' <= c && c <= '\u074F';\r
387 \r
388                         case Category.UnicodeThaana:\r
389                                 return '\u0780' <= c && c <= '\u07BF';\r
390 \r
391                         case Category.UnicodeDevanagari:\r
392                                 return '\u0900' <= c && c <= '\u097F';\r
393 \r
394                         case Category.UnicodeBengali:\r
395                                 return '\u0980' <= c && c <= '\u09FF';\r
396 \r
397                         case Category.UnicodeGurmukhi:\r
398                                 return '\u0A00' <= c && c <= '\u0A7F';\r
399 \r
400                         case Category.UnicodeGujarati:\r
401                                 return '\u0A80' <= c && c <= '\u0AFF';\r
402 \r
403                         case Category.UnicodeOriya:\r
404                                 return '\u0B00' <= c && c <= '\u0B7F';\r
405 \r
406                         case Category.UnicodeTamil:\r
407                                 return '\u0B80' <= c && c <= '\u0BFF';\r
408 \r
409                         case Category.UnicodeTelugu:\r
410                                 return '\u0C00' <= c && c <= '\u0C7F';\r
411 \r
412                         case Category.UnicodeKannada:\r
413                                 return '\u0C80' <= c && c <= '\u0CFF';\r
414 \r
415                         case Category.UnicodeMalayalam:\r
416                                 return '\u0D00' <= c && c <= '\u0D7F';\r
417 \r
418                         case Category.UnicodeSinhala:\r
419                                 return '\u0D80' <= c && c <= '\u0DFF';\r
420 \r
421                         case Category.UnicodeThai:\r
422                                 return '\u0E00' <= c && c <= '\u0E7F';\r
423 \r
424                         case Category.UnicodeLao:\r
425                                 return '\u0E80' <= c && c <= '\u0EFF';\r
426 \r
427                         case Category.UnicodeTibetan:\r
428                                 return '\u0F00' <= c && c <= '\u0FFF';\r
429 \r
430                         case Category.UnicodeMyanmar:\r
431                                 return '\u1000' <= c && c <= '\u109F';\r
432 \r
433                         case Category.UnicodeGeorgian:\r
434                                 return '\u10A0' <= c && c <= '\u10FF';\r
435 \r
436                         case Category.UnicodeHangulJamo:\r
437                                 return '\u1100' <= c && c <= '\u11FF';\r
438 \r
439                         case Category.UnicodeEthiopic:\r
440                                 return '\u1200' <= c && c <= '\u137F';\r
441 \r
442                         case Category.UnicodeCherokee:\r
443                                 return '\u13A0' <= c && c <= '\u13FF';\r
444 \r
445                         case Category.UnicodeUnifiedCanadianAboriginalSyllabics:\r
446                                 return '\u1400' <= c && c <= '\u167F';\r
447 \r
448                         case Category.UnicodeOgham:\r
449                                 return '\u1680' <= c && c <= '\u169F';\r
450 \r
451                         case Category.UnicodeRunic:\r
452                                 return '\u16A0' <= c && c <= '\u16FF';\r
453 \r
454                         case Category.UnicodeKhmer:\r
455                                 return '\u1780' <= c && c <= '\u17FF';\r
456 \r
457                         case Category.UnicodeMongolian:\r
458                                 return '\u1800' <= c && c <= '\u18AF';\r
459 \r
460                         case Category.UnicodeLatinExtendedAdditional:\r
461                                 return '\u1E00' <= c && c <= '\u1EFF';\r
462 \r
463                         case Category.UnicodeGreekExtended:\r
464                                 return '\u1F00' <= c && c <= '\u1FFF';\r
465 \r
466                         case Category.UnicodeGeneralPunctuation:\r
467                                 return '\u2000' <= c && c <= '\u206F';\r
468 \r
469                         case Category.UnicodeSuperscriptsandSubscripts:\r
470                                 return '\u2070' <= c && c <= '\u209F';\r
471 \r
472                         case Category.UnicodeCurrencySymbols:\r
473                                 return '\u20A0' <= c && c <= '\u20CF';\r
474 \r
475                         case Category.UnicodeCombiningMarksforSymbols:\r
476                                 return '\u20D0' <= c && c <= '\u20FF';\r
477 \r
478                         case Category.UnicodeLetterlikeSymbols:\r
479                                 return '\u2100' <= c && c <= '\u214F';\r
480 \r
481                         case Category.UnicodeNumberForms:\r
482                                 return '\u2150' <= c && c <= '\u218F';\r
483 \r
484                         case Category.UnicodeArrows:\r
485                                 return '\u2190' <= c && c <= '\u21FF';\r
486 \r
487                         case Category.UnicodeMathematicalOperators:\r
488                                 return '\u2200' <= c && c <= '\u22FF';\r
489 \r
490                         case Category.UnicodeMiscellaneousTechnical:\r
491                                 return '\u2300' <= c && c <= '\u23FF';\r
492 \r
493                         case Category.UnicodeControlPictures:\r
494                                 return '\u2400' <= c && c <= '\u243F';\r
495 \r
496                         case Category.UnicodeOpticalCharacterRecognition:\r
497                                 return '\u2440' <= c && c <= '\u245F';\r
498 \r
499                         case Category.UnicodeEnclosedAlphanumerics:\r
500                                 return '\u2460' <= c && c <= '\u24FF';\r
501 \r
502                         case Category.UnicodeBoxDrawing:\r
503                                 return '\u2500' <= c && c <= '\u257F';\r
504 \r
505                         case Category.UnicodeBlockElements:\r
506                                 return '\u2580' <= c && c <= '\u259F';\r
507 \r
508                         case Category.UnicodeGeometricShapes:\r
509                                 return '\u25A0' <= c && c <= '\u25FF';\r
510 \r
511                         case Category.UnicodeMiscellaneousSymbols:\r
512                                 return '\u2600' <= c && c <= '\u26FF';\r
513 \r
514                         case Category.UnicodeDingbats:\r
515                                 return '\u2700' <= c && c <= '\u27BF';\r
516 \r
517                         case Category.UnicodeBraillePatterns:\r
518                                 return '\u2800' <= c && c <= '\u28FF';\r
519 \r
520                         case Category.UnicodeCJKRadicalsSupplement:\r
521                                 return '\u2E80' <= c && c <= '\u2EFF';\r
522 \r
523                         case Category.UnicodeKangxiRadicals:\r
524                                 return '\u2F00' <= c && c <= '\u2FDF';\r
525 \r
526                         case Category.UnicodeIdeographicDescriptionCharacters:\r
527                                 return '\u2FF0' <= c && c <= '\u2FFF';\r
528 \r
529                         case Category.UnicodeCJKSymbolsandPunctuation:\r
530                                 return '\u3000' <= c && c <= '\u303F';\r
531 \r
532                         case Category.UnicodeHiragana:\r
533                                 return '\u3040' <= c && c <= '\u309F';\r
534 \r
535                         case Category.UnicodeKatakana:\r
536                                 return '\u30A0' <= c && c <= '\u30FF';\r
537 \r
538                         case Category.UnicodeBopomofo:\r
539                                 return '\u3100' <= c && c <= '\u312F';\r
540 \r
541                         case Category.UnicodeHangulCompatibilityJamo:\r
542                                 return '\u3130' <= c && c <= '\u318F';\r
543 \r
544                         case Category.UnicodeKanbun:\r
545                                 return '\u3190' <= c && c <= '\u319F';\r
546 \r
547                         case Category.UnicodeBopomofoExtended:\r
548                                 return '\u31A0' <= c && c <= '\u31BF';\r
549 \r
550                         case Category.UnicodeEnclosedCJKLettersandMonths:\r
551                                 return '\u3200' <= c && c <= '\u32FF';\r
552 \r
553                         case Category.UnicodeCJKCompatibility:\r
554                                 return '\u3300' <= c && c <= '\u33FF';\r
555 \r
556                         case Category.UnicodeCJKUnifiedIdeographsExtensionA:\r
557                                 return '\u3400' <= c && c <= '\u4DB5';\r
558 \r
559                         case Category.UnicodeCJKUnifiedIdeographs:\r
560                                 return '\u4E00' <= c && c <= '\u9FFF';\r
561 \r
562                         case Category.UnicodeYiSyllables:\r
563                                 return '\uA000' <= c && c <= '\uA48F';\r
564 \r
565                         case Category.UnicodeYiRadicals:\r
566                                 return '\uA490' <= c && c <= '\uA4CF';\r
567 \r
568                         case Category.UnicodeHangulSyllables:\r
569                                 return '\uAC00' <= c && c <= '\uD7A3';\r
570 \r
571                         case Category.UnicodeHighSurrogates:\r
572                                 return '\uD800' <= c && c <= '\uDB7F';\r
573 \r
574                         case Category.UnicodeHighPrivateUseSurrogates:\r
575                                 return '\uDB80' <= c && c <= '\uDBFF';\r
576 \r
577                         case Category.UnicodeLowSurrogates:\r
578                                 return '\uDC00' <= c && c <= '\uDFFF';\r
579 \r
580                         case Category.UnicodePrivateUse:\r
581                                 return '\uE000' <= c && c <= '\uF8FF';\r
582 \r
583                         case Category.UnicodeCJKCompatibilityIdeographs:\r
584                                 return '\uF900' <= c && c <= '\uFAFF';\r
585 \r
586                         case Category.UnicodeAlphabeticPresentationForms:\r
587                                 return '\uFB00' <= c && c <= '\uFB4F';\r
588 \r
589                         case Category.UnicodeArabicPresentationFormsA:\r
590                                 return '\uFB50' <= c && c <= '\uFDFF';\r
591 \r
592                         case Category.UnicodeCombiningHalfMarks:\r
593                                 return '\uFE20' <= c && c <= '\uFE2F';\r
594 \r
595                         case Category.UnicodeCJKCompatibilityForms:\r
596                                 return '\uFE30' <= c && c <= '\uFE4F';\r
597 \r
598                         case Category.UnicodeSmallFormVariants:\r
599                                 return '\uFE50' <= c && c <= '\uFE6F';\r
600 \r
601                         case Category.UnicodeArabicPresentationFormsB:\r
602                                 return '\uFE70' <= c && c <= '\uFEFE';\r
603 \r
604                         case Category.UnicodeHalfwidthandFullwidthForms:\r
605                                 return '\uFF00' <= c && c <= '\uFFEF';\r
606 \r
607                         case Category.UnicodeSpecials:\r
608                                 return\r
609                                         '\uFEFF' <= c && c <= '\uFEFF' ||\r
610                                         '\uFFF0' <= c && c <= '\uFFFD';\r
611 \r
612                         // these block ranges begin above 0x10000\r
613 \r
614                         case Category.UnicodeOldItalic:\r
615                         case Category.UnicodeGothic:\r
616                         case Category.UnicodeDeseret:\r
617                         case Category.UnicodeByzantineMusicalSymbols:\r
618                         case Category.UnicodeMusicalSymbols:\r
619                         case Category.UnicodeMathematicalAlphanumericSymbols:\r
620                         case Category.UnicodeCJKUnifiedIdeographsExtensionB:\r
621                         case Category.UnicodeCJKCompatibilityIdeographsSupplement:\r
622                         case Category.UnicodeTags:\r
623                                 return false;\r
624 \r
625                         default:\r
626                                 return false;\r
627                         }\r
628                 }\r
629 \r
630                 private static bool IsCategory (UnicodeCategory uc, char c) {\r
631                         if (Char.GetUnicodeCategory (c) == uc)\r
632                                 return true;\r
633 \r
634                         return false;\r
635                 }\r
636         }\r
637 }\r