* RegexRunnerFactory.cs: removed comment, no longer throw exception
[mono.git] / mcs / class / System / System.Text.RegularExpressions / category.cs
1 //\r
2 // assembly:    System\r
3 // namespace:   System.Text.RegularExpressions\r
4 // file:        category.cs\r
5 //\r
6 // author:      Dan Lewis (dlewis@gmx.co.uk)\r
7 //              (c) 2002\r
8 \r
9 using System;\r
10 using System.Globalization;\r
11 \r
12 namespace System.Text.RegularExpressions {\r
13 \r
14         enum Category : ushort {\r
15                 None,\r
16 \r
17                 // canonical classes\r
18         \r
19                 Any,                    // any character except newline         .\r
20                 AnySingleline,          // any character                        . (s option)\r
21                 Word,                   // any word character                   \w\r
22                 Digit,                  // any digit character                  \d\r
23                 WhiteSpace,             // any whitespace character             \s\r
24                 \r
25                 // ECMAScript classes\r
26 \r
27 \r
28                 EcmaAny,\r
29                 EcmaAnySingleline,\r
30                 EcmaWord,               // [a-zA-Z_0-9]\r
31                 EcmaDigit,              // [0-9]\r
32                 EcmaWhiteSpace,         // [ \f\n\r\t\v]\r
33 \r
34                 // unicode categories\r
35                 \r
36                 UnicodeL,               // Letter\r
37                 UnicodeM,               // Mark\r
38                 UnicodeN,               // Number\r
39                 UnicodeZ,               // Separator\r
40                 UnicodeP,               // Punctuation\r
41                 UnicodeS,               // Symbol\r
42                 UnicodeC,               // Other\r
43 \r
44                 UnicodeLu,              // UppercaseLetter\r
45                 UnicodeLl,              // LowercaseLetter\r
46                 UnicodeLt,              // TitlecaseLetter\r
47                 UnicodeLm,              // ModifierLetter\r
48                 UnicodeLo,              // OtherLetter\r
49                 UnicodeMn,              // NonspacingMark\r
50                 UnicodeMe,              // EnclosingMark\r
51                 UnicodeMc,              // SpacingMark\r
52                 UnicodeNd,              // DecimalNumber\r
53                 UnicodeNl,              // LetterNumber\r
54                 UnicodeNo,              // OtherNumber\r
55                 UnicodeZs,              // SpaceSeparator\r
56                 UnicodeZl,              // LineSeparator\r
57                 UnicodeZp,              // ParagraphSeparator\r
58                 UnicodePd,              // DashPunctuation\r
59                 UnicodePs,              // OpenPunctuation\r
60                 UnicodePi,              // InitialPunctuation\r
61                 UnicodePe,              // ClosePunctuation\r
62                 UnicodePf,              // FinalPunctuation\r
63                 UnicodePc,              // ConnectorPunctuation\r
64                 UnicodePo,              // OtherPunctuation\r
65                 UnicodeSm,              // MathSymbol\r
66                 UnicodeSc,              // CurrencySymbol\r
67                 UnicodeSk,              // ModifierSymbol\r
68                 UnicodeSo,              // OtherSymbol\r
69                 UnicodeCc,              // Control\r
70                 UnicodeCf,              // Format\r
71                 UnicodeCo,              // PrivateUse\r
72                 UnicodeCs,              // Surrogate\r
73                 UnicodeCn,              // Unassigned\r
74 \r
75                 // unicode block ranges\r
76 \r
77                 // notes: the categories marked with a star are valid unicode block ranges,\r
78                 // but don't seem to be accepted by the MS parser using the /p{...} format.\r
79                 // any ideas?\r
80 \r
81                 UnicodeBasicLatin,\r
82                 UnicodeLatin1Supplement,                        // *\r
83                 UnicodeLatinExtendedA,                          // *\r
84                 UnicodeLatinExtendedB,                          // *\r
85                 UnicodeIPAExtensions,\r
86                 UnicodeSpacingModifierLetters,\r
87                 UnicodeCombiningDiacriticalMarks,\r
88                 UnicodeGreek,\r
89                 UnicodeCyrillic,\r
90                 UnicodeArmenian,\r
91                 UnicodeHebrew,\r
92                 UnicodeArabic,\r
93                 UnicodeSyriac,\r
94                 UnicodeThaana,\r
95                 UnicodeDevanagari,\r
96                 UnicodeBengali,\r
97                 UnicodeGurmukhi,\r
98                 UnicodeGujarati,\r
99                 UnicodeOriya,\r
100                 UnicodeTamil,\r
101                 UnicodeTelugu,\r
102                 UnicodeKannada,\r
103                 UnicodeMalayalam,\r
104                 UnicodeSinhala,\r
105                 UnicodeThai,\r
106                 UnicodeLao,\r
107                 UnicodeTibetan,\r
108                 UnicodeMyanmar,\r
109                 UnicodeGeorgian,\r
110                 UnicodeHangulJamo,\r
111                 UnicodeEthiopic,\r
112                 UnicodeCherokee,\r
113                 UnicodeUnifiedCanadianAboriginalSyllabics,\r
114                 UnicodeOgham,\r
115                 UnicodeRunic,\r
116                 UnicodeKhmer,\r
117                 UnicodeMongolian,\r
118                 UnicodeLatinExtendedAdditional,\r
119                 UnicodeGreekExtended,\r
120                 UnicodeGeneralPunctuation,\r
121                 UnicodeSuperscriptsandSubscripts,\r
122                 UnicodeCurrencySymbols,\r
123                 UnicodeCombiningMarksforSymbols,\r
124                 UnicodeLetterlikeSymbols,\r
125                 UnicodeNumberForms,\r
126                 UnicodeArrows,\r
127                 UnicodeMathematicalOperators,\r
128                 UnicodeMiscellaneousTechnical,\r
129                 UnicodeControlPictures,\r
130                 UnicodeOpticalCharacterRecognition,\r
131                 UnicodeEnclosedAlphanumerics,\r
132                 UnicodeBoxDrawing,\r
133                 UnicodeBlockElements,\r
134                 UnicodeGeometricShapes,\r
135                 UnicodeMiscellaneousSymbols,\r
136                 UnicodeDingbats,\r
137                 UnicodeBraillePatterns,\r
138                 UnicodeCJKRadicalsSupplement,\r
139                 UnicodeKangxiRadicals,\r
140                 UnicodeIdeographicDescriptionCharacters,\r
141                 UnicodeCJKSymbolsandPunctuation,\r
142                 UnicodeHiragana,\r
143                 UnicodeKatakana,\r
144                 UnicodeBopomofo,\r
145                 UnicodeHangulCompatibilityJamo,\r
146                 UnicodeKanbun,\r
147                 UnicodeBopomofoExtended,\r
148                 UnicodeEnclosedCJKLettersandMonths,\r
149                 UnicodeCJKCompatibility,\r
150                 UnicodeCJKUnifiedIdeographsExtensionA,\r
151                 UnicodeCJKUnifiedIdeographs,\r
152                 UnicodeYiSyllables,\r
153                 UnicodeYiRadicals,\r
154                 UnicodeHangulSyllables,\r
155                 UnicodeHighSurrogates,\r
156                 UnicodeHighPrivateUseSurrogates,\r
157                 UnicodeLowSurrogates,\r
158                 UnicodePrivateUse,\r
159                 UnicodeCJKCompatibilityIdeographs,\r
160                 UnicodeAlphabeticPresentationForms,\r
161                 UnicodeArabicPresentationFormsA,                // *\r
162                 UnicodeCombiningHalfMarks,\r
163                 UnicodeCJKCompatibilityForms,\r
164                 UnicodeSmallFormVariants,\r
165                 UnicodeArabicPresentationFormsB,                // *\r
166                 UnicodeSpecials,\r
167                 UnicodeHalfwidthandFullwidthForms,\r
168                 \r
169                 UnicodeOldItalic,\r
170                 UnicodeGothic,\r
171                 UnicodeDeseret,\r
172                 UnicodeByzantineMusicalSymbols,\r
173                 UnicodeMusicalSymbols,\r
174                 UnicodeMathematicalAlphanumericSymbols,\r
175                 UnicodeCJKUnifiedIdeographsExtensionB,\r
176                 UnicodeCJKCompatibilityIdeographsSupplement,\r
177                 UnicodeTags,\r
178 \r
179                 LastValue // Keep this with the higher value in the enumeration\r
180         }\r
181 \r
182         class CategoryUtils {\r
183                 public static Category CategoryFromName (string name) {\r
184                         try {\r
185                                 if (name.StartsWith ("Is"))     // remove prefix from block range\r
186                                         name = name.Substring (2);\r
187 \r
188                                 return (Category)Enum.Parse (typeof (Category), "Unicode" + name);\r
189                         }\r
190                         catch (ArgumentException) {\r
191                                 return Category.None;\r
192                         }\r
193                 }\r
194         \r
195                 public static bool IsCategory (Category cat, char c) {\r
196                         switch (cat) {\r
197                         case Category.None:\r
198                                 return false;\r
199                         \r
200                         case Category.Any:\r
201                                 return c != '\n';\r
202 \r
203                         case Category.AnySingleline:\r
204                                 return true;\r
205 \r
206                         case Category.Word:\r
207                                 return\r
208                                         Char.IsLetterOrDigit (c) ||\r
209                                         IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
210 \r
211                         case Category.Digit:\r
212                                 return Char.IsDigit (c);\r
213 \r
214                         case Category.WhiteSpace:\r
215                                 return Char.IsWhiteSpace (c);\r
216 \r
217                         // ECMA categories\r
218 \r
219                         case Category.EcmaAny:\r
220                                 return c != '\n';\r
221                                 \r
222                         case Category.EcmaAnySingleline:\r
223                                 return true;\r
224 \r
225                         case Category.EcmaWord:\r
226                                 return\r
227                                         'a' <= c && c <= 'z' ||\r
228                                         'A' <= c && c <= 'Z' ||\r
229                                         '0' <= c && c <= '9' ||\r
230                                         '_' == c;\r
231 \r
232                         case Category.EcmaDigit:\r
233                                 return\r
234                                         '0' <= c && c <= 9;\r
235                         \r
236                         case Category.EcmaWhiteSpace:\r
237                                 return\r
238                                         c == ' '  ||\r
239                                         c == '\f' ||\r
240                                         c == '\n' ||\r
241                                         c == '\r' ||\r
242                                         c == '\t' ||\r
243                                         c == '\v';\r
244 \r
245                         // Unicode categories...\r
246 \r
247                         // letter\r
248                         \r
249                         case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);\r
250                         case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);\r
251                         case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);\r
252                         case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);\r
253                         case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);\r
254 \r
255                         // mark\r
256 \r
257                         case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);\r
258                         case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);\r
259                         case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
260                         case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);\r
261 \r
262                         // number\r
263 \r
264                         case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);\r
265                         case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);\r
266 \r
267                         // separator\r
268 \r
269                         case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);\r
270                         case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);\r
271                         case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
272 \r
273                         // punctuation\r
274 \r
275                         case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);\r
276                         case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);\r
277                         case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);\r
278                         case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);\r
279                         case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);\r
280                         case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);\r
281                         case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);\r
282 \r
283                         // symbol\r
284 \r
285                         case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);\r
286                         case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);\r
287                         case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);\r
288                         case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);\r
289 \r
290                         // other\r
291 \r
292                         case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);\r
293                         case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);\r
294                         case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);\r
295                         case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);\r
296                         case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c); \r
297 \r
298                         case Category.UnicodeL: // letter\r
299                                 return\r
300                                         IsCategory (UnicodeCategory.UppercaseLetter, c) ||\r
301                                         IsCategory (UnicodeCategory.LowercaseLetter, c) ||\r
302                                         IsCategory (UnicodeCategory.TitlecaseLetter, c) ||\r
303                                         IsCategory (UnicodeCategory.ModifierLetter, c) ||\r
304                                         IsCategory (UnicodeCategory.OtherLetter, c);\r
305                         \r
306                         case Category.UnicodeM: // mark\r
307                                 return\r
308                                         IsCategory (UnicodeCategory.NonSpacingMark, c) ||\r
309                                         IsCategory (UnicodeCategory.EnclosingMark, c) ||\r
310                                         IsCategory (UnicodeCategory.SpacingCombiningMark, c);\r
311 \r
312                         case Category.UnicodeN: // number\r
313                                 return\r
314                                         IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||\r
315                                         IsCategory (UnicodeCategory.LetterNumber, c) ||\r
316                                         IsCategory (UnicodeCategory.OtherNumber, c);\r
317 \r
318                         case Category.UnicodeZ: // separator\r
319                                 return\r
320                                         IsCategory (UnicodeCategory.SpaceSeparator, c) ||\r
321                                         IsCategory (UnicodeCategory.LineSeparator, c) ||\r
322                                         IsCategory (UnicodeCategory.ParagraphSeparator, c);\r
323                                         \r
324                         case Category.UnicodeP: // punctuation\r
325                                 return\r
326                                         IsCategory (UnicodeCategory.DashPunctuation, c) ||\r
327                                         IsCategory (UnicodeCategory.OpenPunctuation, c) ||\r
328                                         IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||\r
329                                         IsCategory (UnicodeCategory.ClosePunctuation, c) ||\r
330                                         IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||\r
331                                         IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||\r
332                                         IsCategory (UnicodeCategory.OtherPunctuation, c);\r
333                         \r
334                         case Category.UnicodeS: // symbol\r
335                                 return\r
336                                         IsCategory (UnicodeCategory.MathSymbol, c) ||\r
337                                         IsCategory (UnicodeCategory.CurrencySymbol, c) ||\r
338                                         IsCategory (UnicodeCategory.ModifierSymbol, c) ||\r
339                                         IsCategory (UnicodeCategory.OtherSymbol, c);\r
340                         \r
341                         case Category.UnicodeC: // other\r
342                                 return\r
343                                         IsCategory (UnicodeCategory.Control, c) ||\r
344                                         IsCategory (UnicodeCategory.Format, c) ||\r
345                                         IsCategory (UnicodeCategory.PrivateUse, c) ||\r
346                                         IsCategory (UnicodeCategory.Surrogate, c) ||\r
347                                         IsCategory (UnicodeCategory.OtherNotAssigned, c);\r
348 \r
349                         // Unicode block ranges...\r
350 \r
351                         case Category.UnicodeBasicLatin:\r
352                                 return '\u0000' <= c && c <= '\u007F';\r
353 \r
354                         case Category.UnicodeLatin1Supplement:\r
355                                 return '\u0080' <= c && c <= '\u00FF';\r
356 \r
357                         case Category.UnicodeLatinExtendedA:\r
358                                 return '\u0100' <= c && c <= '\u017F';\r
359 \r
360                         case Category.UnicodeLatinExtendedB:\r
361                                 return '\u0180' <= c && c <= '\u024F';\r
362 \r
363                         case Category.UnicodeIPAExtensions:\r
364                                 return '\u0250' <= c && c <= '\u02AF';\r
365 \r
366                         case Category.UnicodeSpacingModifierLetters:\r
367                                 return '\u02B0' <= c && c <= '\u02FF';\r
368 \r
369                         case Category.UnicodeCombiningDiacriticalMarks:\r
370                                 return '\u0300' <= c && c <= '\u036F';\r
371 \r
372                         case Category.UnicodeGreek:\r
373                                 return '\u0370' <= c && c <= '\u03FF';\r
374 \r
375                         case Category.UnicodeCyrillic:\r
376                                 return '\u0400' <= c && c <= '\u04FF';\r
377 \r
378                         case Category.UnicodeArmenian:\r
379                                 return '\u0530' <= c && c <= '\u058F';\r
380 \r
381                         case Category.UnicodeHebrew:\r
382                                 return '\u0590' <= c && c <= '\u05FF';\r
383 \r
384                         case Category.UnicodeArabic:\r
385                                 return '\u0600' <= c && c <= '\u06FF';\r
386 \r
387                         case Category.UnicodeSyriac:\r
388                                 return '\u0700' <= c && c <= '\u074F';\r
389 \r
390                         case Category.UnicodeThaana:\r
391                                 return '\u0780' <= c && c <= '\u07BF';\r
392 \r
393                         case Category.UnicodeDevanagari:\r
394                                 return '\u0900' <= c && c <= '\u097F';\r
395 \r
396                         case Category.UnicodeBengali:\r
397                                 return '\u0980' <= c && c <= '\u09FF';\r
398 \r
399                         case Category.UnicodeGurmukhi:\r
400                                 return '\u0A00' <= c && c <= '\u0A7F';\r
401 \r
402                         case Category.UnicodeGujarati:\r
403                                 return '\u0A80' <= c && c <= '\u0AFF';\r
404 \r
405                         case Category.UnicodeOriya:\r
406                                 return '\u0B00' <= c && c <= '\u0B7F';\r
407 \r
408                         case Category.UnicodeTamil:\r
409                                 return '\u0B80' <= c && c <= '\u0BFF';\r
410 \r
411                         case Category.UnicodeTelugu:\r
412                                 return '\u0C00' <= c && c <= '\u0C7F';\r
413 \r
414                         case Category.UnicodeKannada:\r
415                                 return '\u0C80' <= c && c <= '\u0CFF';\r
416 \r
417                         case Category.UnicodeMalayalam:\r
418                                 return '\u0D00' <= c && c <= '\u0D7F';\r
419 \r
420                         case Category.UnicodeSinhala:\r
421                                 return '\u0D80' <= c && c <= '\u0DFF';\r
422 \r
423                         case Category.UnicodeThai:\r
424                                 return '\u0E00' <= c && c <= '\u0E7F';\r
425 \r
426                         case Category.UnicodeLao:\r
427                                 return '\u0E80' <= c && c <= '\u0EFF';\r
428 \r
429                         case Category.UnicodeTibetan:\r
430                                 return '\u0F00' <= c && c <= '\u0FFF';\r
431 \r
432                         case Category.UnicodeMyanmar:\r
433                                 return '\u1000' <= c && c <= '\u109F';\r
434 \r
435                         case Category.UnicodeGeorgian:\r
436                                 return '\u10A0' <= c && c <= '\u10FF';\r
437 \r
438                         case Category.UnicodeHangulJamo:\r
439                                 return '\u1100' <= c && c <= '\u11FF';\r
440 \r
441                         case Category.UnicodeEthiopic:\r
442                                 return '\u1200' <= c && c <= '\u137F';\r
443 \r
444                         case Category.UnicodeCherokee:\r
445                                 return '\u13A0' <= c && c <= '\u13FF';\r
446 \r
447                         case Category.UnicodeUnifiedCanadianAboriginalSyllabics:\r
448                                 return '\u1400' <= c && c <= '\u167F';\r
449 \r
450                         case Category.UnicodeOgham:\r
451                                 return '\u1680' <= c && c <= '\u169F';\r
452 \r
453                         case Category.UnicodeRunic:\r
454                                 return '\u16A0' <= c && c <= '\u16FF';\r
455 \r
456                         case Category.UnicodeKhmer:\r
457                                 return '\u1780' <= c && c <= '\u17FF';\r
458 \r
459                         case Category.UnicodeMongolian:\r
460                                 return '\u1800' <= c && c <= '\u18AF';\r
461 \r
462                         case Category.UnicodeLatinExtendedAdditional:\r
463                                 return '\u1E00' <= c && c <= '\u1EFF';\r
464 \r
465                         case Category.UnicodeGreekExtended:\r
466                                 return '\u1F00' <= c && c <= '\u1FFF';\r
467 \r
468                         case Category.UnicodeGeneralPunctuation:\r
469                                 return '\u2000' <= c && c <= '\u206F';\r
470 \r
471                         case Category.UnicodeSuperscriptsandSubscripts:\r
472                                 return '\u2070' <= c && c <= '\u209F';\r
473 \r
474                         case Category.UnicodeCurrencySymbols:\r
475                                 return '\u20A0' <= c && c <= '\u20CF';\r
476 \r
477                         case Category.UnicodeCombiningMarksforSymbols:\r
478                                 return '\u20D0' <= c && c <= '\u20FF';\r
479 \r
480                         case Category.UnicodeLetterlikeSymbols:\r
481                                 return '\u2100' <= c && c <= '\u214F';\r
482 \r
483                         case Category.UnicodeNumberForms:\r
484                                 return '\u2150' <= c && c <= '\u218F';\r
485 \r
486                         case Category.UnicodeArrows:\r
487                                 return '\u2190' <= c && c <= '\u21FF';\r
488 \r
489                         case Category.UnicodeMathematicalOperators:\r
490                                 return '\u2200' <= c && c <= '\u22FF';\r
491 \r
492                         case Category.UnicodeMiscellaneousTechnical:\r
493                                 return '\u2300' <= c && c <= '\u23FF';\r
494 \r
495                         case Category.UnicodeControlPictures:\r
496                                 return '\u2400' <= c && c <= '\u243F';\r
497 \r
498                         case Category.UnicodeOpticalCharacterRecognition:\r
499                                 return '\u2440' <= c && c <= '\u245F';\r
500 \r
501                         case Category.UnicodeEnclosedAlphanumerics:\r
502                                 return '\u2460' <= c && c <= '\u24FF';\r
503 \r
504                         case Category.UnicodeBoxDrawing:\r
505                                 return '\u2500' <= c && c <= '\u257F';\r
506 \r
507                         case Category.UnicodeBlockElements:\r
508                                 return '\u2580' <= c && c <= '\u259F';\r
509 \r
510                         case Category.UnicodeGeometricShapes:\r
511                                 return '\u25A0' <= c && c <= '\u25FF';\r
512 \r
513                         case Category.UnicodeMiscellaneousSymbols:\r
514                                 return '\u2600' <= c && c <= '\u26FF';\r
515 \r
516                         case Category.UnicodeDingbats:\r
517                                 return '\u2700' <= c && c <= '\u27BF';\r
518 \r
519                         case Category.UnicodeBraillePatterns:\r
520                                 return '\u2800' <= c && c <= '\u28FF';\r
521 \r
522                         case Category.UnicodeCJKRadicalsSupplement:\r
523                                 return '\u2E80' <= c && c <= '\u2EFF';\r
524 \r
525                         case Category.UnicodeKangxiRadicals:\r
526                                 return '\u2F00' <= c && c <= '\u2FDF';\r
527 \r
528                         case Category.UnicodeIdeographicDescriptionCharacters:\r
529                                 return '\u2FF0' <= c && c <= '\u2FFF';\r
530 \r
531                         case Category.UnicodeCJKSymbolsandPunctuation:\r
532                                 return '\u3000' <= c && c <= '\u303F';\r
533 \r
534                         case Category.UnicodeHiragana:\r
535                                 return '\u3040' <= c && c <= '\u309F';\r
536 \r
537                         case Category.UnicodeKatakana:\r
538                                 return '\u30A0' <= c && c <= '\u30FF';\r
539 \r
540                         case Category.UnicodeBopomofo:\r
541                                 return '\u3100' <= c && c <= '\u312F';\r
542 \r
543                         case Category.UnicodeHangulCompatibilityJamo:\r
544                                 return '\u3130' <= c && c <= '\u318F';\r
545 \r
546                         case Category.UnicodeKanbun:\r
547                                 return '\u3190' <= c && c <= '\u319F';\r
548 \r
549                         case Category.UnicodeBopomofoExtended:\r
550                                 return '\u31A0' <= c && c <= '\u31BF';\r
551 \r
552                         case Category.UnicodeEnclosedCJKLettersandMonths:\r
553                                 return '\u3200' <= c && c <= '\u32FF';\r
554 \r
555                         case Category.UnicodeCJKCompatibility:\r
556                                 return '\u3300' <= c && c <= '\u33FF';\r
557 \r
558                         case Category.UnicodeCJKUnifiedIdeographsExtensionA:\r
559                                 return '\u3400' <= c && c <= '\u4DB5';\r
560 \r
561                         case Category.UnicodeCJKUnifiedIdeographs:\r
562                                 return '\u4E00' <= c && c <= '\u9FFF';\r
563 \r
564                         case Category.UnicodeYiSyllables:\r
565                                 return '\uA000' <= c && c <= '\uA48F';\r
566 \r
567                         case Category.UnicodeYiRadicals:\r
568                                 return '\uA490' <= c && c <= '\uA4CF';\r
569 \r
570                         case Category.UnicodeHangulSyllables:\r
571                                 return '\uAC00' <= c && c <= '\uD7A3';\r
572 \r
573                         case Category.UnicodeHighSurrogates:\r
574                                 return '\uD800' <= c && c <= '\uDB7F';\r
575 \r
576                         case Category.UnicodeHighPrivateUseSurrogates:\r
577                                 return '\uDB80' <= c && c <= '\uDBFF';\r
578 \r
579                         case Category.UnicodeLowSurrogates:\r
580                                 return '\uDC00' <= c && c <= '\uDFFF';\r
581 \r
582                         case Category.UnicodePrivateUse:\r
583                                 return '\uE000' <= c && c <= '\uF8FF';\r
584 \r
585                         case Category.UnicodeCJKCompatibilityIdeographs:\r
586                                 return '\uF900' <= c && c <= '\uFAFF';\r
587 \r
588                         case Category.UnicodeAlphabeticPresentationForms:\r
589                                 return '\uFB00' <= c && c <= '\uFB4F';\r
590 \r
591                         case Category.UnicodeArabicPresentationFormsA:\r
592                                 return '\uFB50' <= c && c <= '\uFDFF';\r
593 \r
594                         case Category.UnicodeCombiningHalfMarks:\r
595                                 return '\uFE20' <= c && c <= '\uFE2F';\r
596 \r
597                         case Category.UnicodeCJKCompatibilityForms:\r
598                                 return '\uFE30' <= c && c <= '\uFE4F';\r
599 \r
600                         case Category.UnicodeSmallFormVariants:\r
601                                 return '\uFE50' <= c && c <= '\uFE6F';\r
602 \r
603                         case Category.UnicodeArabicPresentationFormsB:\r
604                                 return '\uFE70' <= c && c <= '\uFEFE';\r
605 \r
606                         case Category.UnicodeHalfwidthandFullwidthForms:\r
607                                 return '\uFF00' <= c && c <= '\uFFEF';\r
608 \r
609                         case Category.UnicodeSpecials:\r
610                                 return\r
611                                         '\uFEFF' <= c && c <= '\uFEFF' ||\r
612                                         '\uFFF0' <= c && c <= '\uFFFD';\r
613 \r
614                         // these block ranges begin above 0x10000\r
615 \r
616                         case Category.UnicodeOldItalic:\r
617                         case Category.UnicodeGothic:\r
618                         case Category.UnicodeDeseret:\r
619                         case Category.UnicodeByzantineMusicalSymbols:\r
620                         case Category.UnicodeMusicalSymbols:\r
621                         case Category.UnicodeMathematicalAlphanumericSymbols:\r
622                         case Category.UnicodeCJKUnifiedIdeographsExtensionB:\r
623                         case Category.UnicodeCJKCompatibilityIdeographsSupplement:\r
624                         case Category.UnicodeTags:\r
625                                 return false;\r
626 \r
627                         default:\r
628                                 return false;\r
629                         }\r
630                 }\r
631 \r
632                 private static bool IsCategory (UnicodeCategory uc, char c) {\r
633                         if (Char.GetUnicodeCategory (c) == uc)\r
634                                 return true;\r
635 \r
636                         return false;\r
637                 }\r
638         }\r
639 }\r