3 // namespace: System.Text.RegularExpressions
\r
6 // author: Dan Lewis (dlewis@gmx.co.uk)
\r
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System.Globalization;
\r
33 namespace System.Text.RegularExpressions {
\r
35 enum Category : ushort {
\r
38 // canonical classes
\r
40 Any, // any character except newline .
\r
41 AnySingleline, // any character . (s option)
\r
42 Word, // any word character \w
\r
43 Digit, // any digit character \d
\r
44 WhiteSpace, // any whitespace character \s
\r
46 // ECMAScript classes
\r
51 EcmaWord, // [a-zA-Z_0-9]
\r
53 EcmaWhiteSpace, // [ \f\n\r\t\v]
\r
55 // unicode categories
\r
60 UnicodeZ, // Separator
\r
61 UnicodeP, // Punctuation
\r
65 UnicodeLu, // UppercaseLetter
\r
66 UnicodeLl, // LowercaseLetter
\r
67 UnicodeLt, // TitlecaseLetter
\r
68 UnicodeLm, // ModifierLetter
\r
69 UnicodeLo, // OtherLetter
\r
70 UnicodeMn, // NonspacingMark
\r
71 UnicodeMe, // EnclosingMark
\r
72 UnicodeMc, // SpacingMark
\r
73 UnicodeNd, // DecimalNumber
\r
74 UnicodeNl, // LetterNumber
\r
75 UnicodeNo, // OtherNumber
\r
76 UnicodeZs, // SpaceSeparator
\r
77 UnicodeZl, // LineSeparator
\r
78 UnicodeZp, // ParagraphSeparator
\r
79 UnicodePd, // DashPunctuation
\r
80 UnicodePs, // OpenPunctuation
\r
81 UnicodePi, // InitialPunctuation
\r
82 UnicodePe, // ClosePunctuation
\r
83 UnicodePf, // FinalPunctuation
\r
84 UnicodePc, // ConnectorPunctuation
\r
85 UnicodePo, // OtherPunctuation
\r
86 UnicodeSm, // MathSymbol
\r
87 UnicodeSc, // CurrencySymbol
\r
88 UnicodeSk, // ModifierSymbol
\r
89 UnicodeSo, // OtherSymbol
\r
90 UnicodeCc, // Control
\r
91 UnicodeCf, // Format
\r
92 UnicodeCo, // PrivateUse
\r
93 UnicodeCs, // Surrogate
\r
94 UnicodeCn, // Unassigned
\r
96 // unicode block ranges
\r
98 // notes: the categories marked with a star are valid unicode block ranges,
\r
99 // but don't seem to be accepted by the MS parser using the /p{...} format.
\r
103 UnicodeLatin1Supplement, // *
\r
104 UnicodeLatinExtendedA, // *
\r
105 UnicodeLatinExtendedB, // *
\r
106 UnicodeIPAExtensions,
\r
107 UnicodeSpacingModifierLetters,
\r
108 UnicodeCombiningDiacriticalMarks,
\r
134 UnicodeUnifiedCanadianAboriginalSyllabics,
\r
139 UnicodeLatinExtendedAdditional,
\r
140 UnicodeGreekExtended,
\r
141 UnicodeGeneralPunctuation,
\r
142 UnicodeSuperscriptsandSubscripts,
\r
143 UnicodeCurrencySymbols,
\r
144 UnicodeCombiningMarksforSymbols,
\r
145 UnicodeLetterlikeSymbols,
\r
146 UnicodeNumberForms,
\r
148 UnicodeMathematicalOperators,
\r
149 UnicodeMiscellaneousTechnical,
\r
150 UnicodeControlPictures,
\r
151 UnicodeOpticalCharacterRecognition,
\r
152 UnicodeEnclosedAlphanumerics,
\r
154 UnicodeBlockElements,
\r
155 UnicodeGeometricShapes,
\r
156 UnicodeMiscellaneousSymbols,
\r
158 UnicodeBraillePatterns,
\r
159 UnicodeCJKRadicalsSupplement,
\r
160 UnicodeKangxiRadicals,
\r
161 UnicodeIdeographicDescriptionCharacters,
\r
162 UnicodeCJKSymbolsandPunctuation,
\r
166 UnicodeHangulCompatibilityJamo,
\r
168 UnicodeBopomofoExtended,
\r
169 UnicodeEnclosedCJKLettersandMonths,
\r
170 UnicodeCJKCompatibility,
\r
171 UnicodeCJKUnifiedIdeographsExtensionA,
\r
172 UnicodeCJKUnifiedIdeographs,
\r
173 UnicodeYiSyllables,
\r
175 UnicodeHangulSyllables,
\r
176 UnicodeHighSurrogates,
\r
177 UnicodeHighPrivateUseSurrogates,
\r
178 UnicodeLowSurrogates,
\r
180 UnicodeCJKCompatibilityIdeographs,
\r
181 UnicodeAlphabeticPresentationForms,
\r
182 UnicodeArabicPresentationFormsA, // *
\r
183 UnicodeCombiningHalfMarks,
\r
184 UnicodeCJKCompatibilityForms,
\r
185 UnicodeSmallFormVariants,
\r
186 UnicodeArabicPresentationFormsB, // *
\r
188 UnicodeHalfwidthandFullwidthForms,
\r
193 UnicodeByzantineMusicalSymbols,
\r
194 UnicodeMusicalSymbols,
\r
195 UnicodeMathematicalAlphanumericSymbols,
\r
196 UnicodeCJKUnifiedIdeographsExtensionB,
\r
197 UnicodeCJKCompatibilityIdeographsSupplement,
\r
200 LastValue // Keep this with the higher value in the enumeration
\r
203 class CategoryUtils {
\r
204 public static Category CategoryFromName (string name) {
\r
206 if (name.StartsWith ("Is")) // remove prefix from block range
\r
207 name = name.Substring (2);
\r
209 return (Category)Enum.Parse (typeof (Category), "Unicode" + name);
\r
211 catch (ArgumentException) {
\r
212 return Category.None;
\r
216 public static bool IsCategory (Category cat, char c) {
\r
218 case Category.None:
\r
224 case Category.AnySingleline:
\r
227 case Category.Word:
\r
229 Char.IsLetterOrDigit (c) ||
\r
230 IsCategory (UnicodeCategory.ConnectorPunctuation, c);
\r
232 case Category.Digit:
\r
233 return Char.IsDigit (c);
\r
235 case Category.WhiteSpace:
\r
236 return Char.IsWhiteSpace (c);
\r
240 case Category.EcmaAny:
\r
243 case Category.EcmaAnySingleline:
\r
246 case Category.EcmaWord:
\r
248 'a' <= c && c <= 'z' ||
\r
249 'A' <= c && c <= 'Z' ||
\r
250 '0' <= c && c <= '9' ||
\r
253 case Category.EcmaDigit:
\r
255 '0' <= c && c <= 9;
\r
257 case Category.EcmaWhiteSpace:
\r
266 // Unicode categories...
\r
270 case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);
\r
271 case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);
\r
272 case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);
\r
273 case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);
\r
274 case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);
\r
278 case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);
\r
279 case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);
\r
280 case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);
\r
281 case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);
\r
285 case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);
\r
286 case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);
\r
290 case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);
\r
291 case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);
\r
292 case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);
\r
296 case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);
\r
297 case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);
\r
298 case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);
\r
299 case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);
\r
300 case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);
\r
301 case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);
\r
302 case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);
\r
306 case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);
\r
307 case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);
\r
308 case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);
\r
309 case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);
\r
313 case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);
\r
314 case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);
\r
315 case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);
\r
316 case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);
\r
317 case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c);
\r
319 case Category.UnicodeL: // letter
\r
321 IsCategory (UnicodeCategory.UppercaseLetter, c) ||
\r
322 IsCategory (UnicodeCategory.LowercaseLetter, c) ||
\r
323 IsCategory (UnicodeCategory.TitlecaseLetter, c) ||
\r
324 IsCategory (UnicodeCategory.ModifierLetter, c) ||
\r
325 IsCategory (UnicodeCategory.OtherLetter, c);
\r
327 case Category.UnicodeM: // mark
\r
329 IsCategory (UnicodeCategory.NonSpacingMark, c) ||
\r
330 IsCategory (UnicodeCategory.EnclosingMark, c) ||
\r
331 IsCategory (UnicodeCategory.SpacingCombiningMark, c);
\r
333 case Category.UnicodeN: // number
\r
335 IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||
\r
336 IsCategory (UnicodeCategory.LetterNumber, c) ||
\r
337 IsCategory (UnicodeCategory.OtherNumber, c);
\r
339 case Category.UnicodeZ: // separator
\r
341 IsCategory (UnicodeCategory.SpaceSeparator, c) ||
\r
342 IsCategory (UnicodeCategory.LineSeparator, c) ||
\r
343 IsCategory (UnicodeCategory.ParagraphSeparator, c);
\r
345 case Category.UnicodeP: // punctuation
\r
347 IsCategory (UnicodeCategory.DashPunctuation, c) ||
\r
348 IsCategory (UnicodeCategory.OpenPunctuation, c) ||
\r
349 IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||
\r
350 IsCategory (UnicodeCategory.ClosePunctuation, c) ||
\r
351 IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||
\r
352 IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||
\r
353 IsCategory (UnicodeCategory.OtherPunctuation, c);
\r
355 case Category.UnicodeS: // symbol
\r
357 IsCategory (UnicodeCategory.MathSymbol, c) ||
\r
358 IsCategory (UnicodeCategory.CurrencySymbol, c) ||
\r
359 IsCategory (UnicodeCategory.ModifierSymbol, c) ||
\r
360 IsCategory (UnicodeCategory.OtherSymbol, c);
\r
362 case Category.UnicodeC: // other
\r
364 IsCategory (UnicodeCategory.Control, c) ||
\r
365 IsCategory (UnicodeCategory.Format, c) ||
\r
366 IsCategory (UnicodeCategory.PrivateUse, c) ||
\r
367 IsCategory (UnicodeCategory.Surrogate, c) ||
\r
368 IsCategory (UnicodeCategory.OtherNotAssigned, c);
\r
370 // Unicode block ranges...
\r
372 case Category.UnicodeBasicLatin:
\r
373 return '\u0000' <= c && c <= '\u007F';
\r
375 case Category.UnicodeLatin1Supplement:
\r
376 return '\u0080' <= c && c <= '\u00FF';
\r
378 case Category.UnicodeLatinExtendedA:
\r
379 return '\u0100' <= c && c <= '\u017F';
\r
381 case Category.UnicodeLatinExtendedB:
\r
382 return '\u0180' <= c && c <= '\u024F';
\r
384 case Category.UnicodeIPAExtensions:
\r
385 return '\u0250' <= c && c <= '\u02AF';
\r
387 case Category.UnicodeSpacingModifierLetters:
\r
388 return '\u02B0' <= c && c <= '\u02FF';
\r
390 case Category.UnicodeCombiningDiacriticalMarks:
\r
391 return '\u0300' <= c && c <= '\u036F';
\r
393 case Category.UnicodeGreek:
\r
394 return '\u0370' <= c && c <= '\u03FF';
\r
396 case Category.UnicodeCyrillic:
\r
397 return '\u0400' <= c && c <= '\u04FF';
\r
399 case Category.UnicodeArmenian:
\r
400 return '\u0530' <= c && c <= '\u058F';
\r
402 case Category.UnicodeHebrew:
\r
403 return '\u0590' <= c && c <= '\u05FF';
\r
405 case Category.UnicodeArabic:
\r
406 return '\u0600' <= c && c <= '\u06FF';
\r
408 case Category.UnicodeSyriac:
\r
409 return '\u0700' <= c && c <= '\u074F';
\r
411 case Category.UnicodeThaana:
\r
412 return '\u0780' <= c && c <= '\u07BF';
\r
414 case Category.UnicodeDevanagari:
\r
415 return '\u0900' <= c && c <= '\u097F';
\r
417 case Category.UnicodeBengali:
\r
418 return '\u0980' <= c && c <= '\u09FF';
\r
420 case Category.UnicodeGurmukhi:
\r
421 return '\u0A00' <= c && c <= '\u0A7F';
\r
423 case Category.UnicodeGujarati:
\r
424 return '\u0A80' <= c && c <= '\u0AFF';
\r
426 case Category.UnicodeOriya:
\r
427 return '\u0B00' <= c && c <= '\u0B7F';
\r
429 case Category.UnicodeTamil:
\r
430 return '\u0B80' <= c && c <= '\u0BFF';
\r
432 case Category.UnicodeTelugu:
\r
433 return '\u0C00' <= c && c <= '\u0C7F';
\r
435 case Category.UnicodeKannada:
\r
436 return '\u0C80' <= c && c <= '\u0CFF';
\r
438 case Category.UnicodeMalayalam:
\r
439 return '\u0D00' <= c && c <= '\u0D7F';
\r
441 case Category.UnicodeSinhala:
\r
442 return '\u0D80' <= c && c <= '\u0DFF';
\r
444 case Category.UnicodeThai:
\r
445 return '\u0E00' <= c && c <= '\u0E7F';
\r
447 case Category.UnicodeLao:
\r
448 return '\u0E80' <= c && c <= '\u0EFF';
\r
450 case Category.UnicodeTibetan:
\r
451 return '\u0F00' <= c && c <= '\u0FFF';
\r
453 case Category.UnicodeMyanmar:
\r
454 return '\u1000' <= c && c <= '\u109F';
\r
456 case Category.UnicodeGeorgian:
\r
457 return '\u10A0' <= c && c <= '\u10FF';
\r
459 case Category.UnicodeHangulJamo:
\r
460 return '\u1100' <= c && c <= '\u11FF';
\r
462 case Category.UnicodeEthiopic:
\r
463 return '\u1200' <= c && c <= '\u137F';
\r
465 case Category.UnicodeCherokee:
\r
466 return '\u13A0' <= c && c <= '\u13FF';
\r
468 case Category.UnicodeUnifiedCanadianAboriginalSyllabics:
\r
469 return '\u1400' <= c && c <= '\u167F';
\r
471 case Category.UnicodeOgham:
\r
472 return '\u1680' <= c && c <= '\u169F';
\r
474 case Category.UnicodeRunic:
\r
475 return '\u16A0' <= c && c <= '\u16FF';
\r
477 case Category.UnicodeKhmer:
\r
478 return '\u1780' <= c && c <= '\u17FF';
\r
480 case Category.UnicodeMongolian:
\r
481 return '\u1800' <= c && c <= '\u18AF';
\r
483 case Category.UnicodeLatinExtendedAdditional:
\r
484 return '\u1E00' <= c && c <= '\u1EFF';
\r
486 case Category.UnicodeGreekExtended:
\r
487 return '\u1F00' <= c && c <= '\u1FFF';
\r
489 case Category.UnicodeGeneralPunctuation:
\r
490 return '\u2000' <= c && c <= '\u206F';
\r
492 case Category.UnicodeSuperscriptsandSubscripts:
\r
493 return '\u2070' <= c && c <= '\u209F';
\r
495 case Category.UnicodeCurrencySymbols:
\r
496 return '\u20A0' <= c && c <= '\u20CF';
\r
498 case Category.UnicodeCombiningMarksforSymbols:
\r
499 return '\u20D0' <= c && c <= '\u20FF';
\r
501 case Category.UnicodeLetterlikeSymbols:
\r
502 return '\u2100' <= c && c <= '\u214F';
\r
504 case Category.UnicodeNumberForms:
\r
505 return '\u2150' <= c && c <= '\u218F';
\r
507 case Category.UnicodeArrows:
\r
508 return '\u2190' <= c && c <= '\u21FF';
\r
510 case Category.UnicodeMathematicalOperators:
\r
511 return '\u2200' <= c && c <= '\u22FF';
\r
513 case Category.UnicodeMiscellaneousTechnical:
\r
514 return '\u2300' <= c && c <= '\u23FF';
\r
516 case Category.UnicodeControlPictures:
\r
517 return '\u2400' <= c && c <= '\u243F';
\r
519 case Category.UnicodeOpticalCharacterRecognition:
\r
520 return '\u2440' <= c && c <= '\u245F';
\r
522 case Category.UnicodeEnclosedAlphanumerics:
\r
523 return '\u2460' <= c && c <= '\u24FF';
\r
525 case Category.UnicodeBoxDrawing:
\r
526 return '\u2500' <= c && c <= '\u257F';
\r
528 case Category.UnicodeBlockElements:
\r
529 return '\u2580' <= c && c <= '\u259F';
\r
531 case Category.UnicodeGeometricShapes:
\r
532 return '\u25A0' <= c && c <= '\u25FF';
\r
534 case Category.UnicodeMiscellaneousSymbols:
\r
535 return '\u2600' <= c && c <= '\u26FF';
\r
537 case Category.UnicodeDingbats:
\r
538 return '\u2700' <= c && c <= '\u27BF';
\r
540 case Category.UnicodeBraillePatterns:
\r
541 return '\u2800' <= c && c <= '\u28FF';
\r
543 case Category.UnicodeCJKRadicalsSupplement:
\r
544 return '\u2E80' <= c && c <= '\u2EFF';
\r
546 case Category.UnicodeKangxiRadicals:
\r
547 return '\u2F00' <= c && c <= '\u2FDF';
\r
549 case Category.UnicodeIdeographicDescriptionCharacters:
\r
550 return '\u2FF0' <= c && c <= '\u2FFF';
\r
552 case Category.UnicodeCJKSymbolsandPunctuation:
\r
553 return '\u3000' <= c && c <= '\u303F';
\r
555 case Category.UnicodeHiragana:
\r
556 return '\u3040' <= c && c <= '\u309F';
\r
558 case Category.UnicodeKatakana:
\r
559 return '\u30A0' <= c && c <= '\u30FF';
\r
561 case Category.UnicodeBopomofo:
\r
562 return '\u3100' <= c && c <= '\u312F';
\r
564 case Category.UnicodeHangulCompatibilityJamo:
\r
565 return '\u3130' <= c && c <= '\u318F';
\r
567 case Category.UnicodeKanbun:
\r
568 return '\u3190' <= c && c <= '\u319F';
\r
570 case Category.UnicodeBopomofoExtended:
\r
571 return '\u31A0' <= c && c <= '\u31BF';
\r
573 case Category.UnicodeEnclosedCJKLettersandMonths:
\r
574 return '\u3200' <= c && c <= '\u32FF';
\r
576 case Category.UnicodeCJKCompatibility:
\r
577 return '\u3300' <= c && c <= '\u33FF';
\r
579 case Category.UnicodeCJKUnifiedIdeographsExtensionA:
\r
580 return '\u3400' <= c && c <= '\u4DB5';
\r
582 case Category.UnicodeCJKUnifiedIdeographs:
\r
583 return '\u4E00' <= c && c <= '\u9FFF';
\r
585 case Category.UnicodeYiSyllables:
\r
586 return '\uA000' <= c && c <= '\uA48F';
\r
588 case Category.UnicodeYiRadicals:
\r
589 return '\uA490' <= c && c <= '\uA4CF';
\r
591 case Category.UnicodeHangulSyllables:
\r
592 return '\uAC00' <= c && c <= '\uD7A3';
\r
594 case Category.UnicodeHighSurrogates:
\r
595 return '\uD800' <= c && c <= '\uDB7F';
\r
597 case Category.UnicodeHighPrivateUseSurrogates:
\r
598 return '\uDB80' <= c && c <= '\uDBFF';
\r
600 case Category.UnicodeLowSurrogates:
\r
601 return '\uDC00' <= c && c <= '\uDFFF';
\r
603 case Category.UnicodePrivateUse:
\r
604 return '\uE000' <= c && c <= '\uF8FF';
\r
606 case Category.UnicodeCJKCompatibilityIdeographs:
\r
607 return '\uF900' <= c && c <= '\uFAFF';
\r
609 case Category.UnicodeAlphabeticPresentationForms:
\r
610 return '\uFB00' <= c && c <= '\uFB4F';
\r
612 case Category.UnicodeArabicPresentationFormsA:
\r
613 return '\uFB50' <= c && c <= '\uFDFF';
\r
615 case Category.UnicodeCombiningHalfMarks:
\r
616 return '\uFE20' <= c && c <= '\uFE2F';
\r
618 case Category.UnicodeCJKCompatibilityForms:
\r
619 return '\uFE30' <= c && c <= '\uFE4F';
\r
621 case Category.UnicodeSmallFormVariants:
\r
622 return '\uFE50' <= c && c <= '\uFE6F';
\r
624 case Category.UnicodeArabicPresentationFormsB:
\r
625 return '\uFE70' <= c && c <= '\uFEFE';
\r
627 case Category.UnicodeHalfwidthandFullwidthForms:
\r
628 return '\uFF00' <= c && c <= '\uFFEF';
\r
630 case Category.UnicodeSpecials:
\r
632 '\uFEFF' <= c && c <= '\uFEFF' ||
\r
633 '\uFFF0' <= c && c <= '\uFFFD';
\r
635 // these block ranges begin above 0x10000
\r
637 case Category.UnicodeOldItalic:
\r
638 case Category.UnicodeGothic:
\r
639 case Category.UnicodeDeseret:
\r
640 case Category.UnicodeByzantineMusicalSymbols:
\r
641 case Category.UnicodeMusicalSymbols:
\r
642 case Category.UnicodeMathematicalAlphanumericSymbols:
\r
643 case Category.UnicodeCJKUnifiedIdeographsExtensionB:
\r
644 case Category.UnicodeCJKCompatibilityIdeographsSupplement:
\r
645 case Category.UnicodeTags:
\r
653 private static bool IsCategory (UnicodeCategory uc, char c) {
\r
654 if (Char.GetUnicodeCategory (c) == uc)
\r