6 /// http://www.w3.org/TR/REC-xml
\r
9 /// Includes code and Ideas from org.apache.xerces.util.XMLChar class of Xerces 2.0.1
\r
10 /// However, No surrogate support is included in this class.
\r
11 /// This class is currently public. Make it internal after testing completes
\r
13 internal class XmlConstructs
\r
15 /** Character flags. */
\r
16 internal static byte[] CHARS = new byte[1 << 16];
\r
18 /** Valid character mask. */
\r
19 internal static int VALID = 0x01;
\r
21 /** Space character mask. */
\r
22 internal static int SPACE = 0x02;
\r
24 /** Name start character mask. */
\r
25 internal static int NAME_START = 0x04;
\r
27 /** Name character mask. */
\r
28 internal static int NAME = 0x08;
\r
30 /** Pubid character mask. */
\r
31 internal static int PUBID = 0x10;
\r
33 * Content character mask. Special characters are those that can
\r
34 * be considered the start of markup, such as '<' and '&'.
\r
35 * The various newline characters are considered special as well.
\r
36 * All other valid XML characters can be considered content.
\r
38 * This is an optimization for the inner loop of character scanning.
\r
40 internal static int CONTENT = 0x20;
\r
42 /** NCName start character mask. */
\r
43 internal static int NCNAME_START = 0x40;
\r
45 /** NCName character mask. */
\r
46 internal static int NCNAME = 0x80;
\r
48 static XmlConstructs()
\r
51 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
\r
52 // [#xE000-#xFFFD] | [#x10000-#x10FFFF]
\r
54 int[] charRange = {
\r
55 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
\r
59 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
\r
61 int[] spaceChar = {
\r
62 0x0020, 0x0009, 0x000D, 0x000A,
\r
66 // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
\r
67 // CombiningChar | Extender
\r
70 0x002D, 0x002E, // '-' and '.'
\r
74 // [5] Name ::= (Letter | '_' | ':') (NameChar)*
\r
77 int[] nameStartChar = {
\r
78 0x003A, 0x005F, // ':' and '_'
\r
82 // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
\r
86 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
\r
90 int[] pubidRange = {
\r
91 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
\r
95 // [84] Letter ::= BaseChar | Ideographic
\r
98 int[] letterRange = {
\r
100 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
\r
101 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
\r
102 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
\r
103 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
\r
104 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
\r
105 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
\r
106 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
\r
107 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
\r
108 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
\r
109 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
\r
110 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
\r
111 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
\r
112 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
\r
113 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
\r
114 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
\r
115 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
\r
116 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
\r
117 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
\r
118 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
\r
119 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
\r
120 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
\r
121 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
\r
122 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
\r
123 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
\r
124 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
\r
125 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
\r
126 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
\r
127 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
\r
128 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
\r
129 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
\r
130 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
\r
131 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
\r
132 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
\r
133 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
\r
134 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
\r
135 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
\r
136 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
\r
139 0x3021, 0x3029, 0x4E00, 0x9FA5,
\r
141 int[] letterChar = {
\r
143 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
\r
144 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
\r
145 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
\r
146 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
\r
147 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
\r
148 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
\r
149 0x1F5D, 0x1FBE, 0x2126, 0x212E,
\r
155 // [87] CombiningChar ::= ...
\r
158 int[] combiningCharRange = {
\r
159 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
\r
160 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
\r
161 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
\r
162 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
\r
163 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
\r
164 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
\r
165 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
\r
166 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
\r
167 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
\r
168 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
\r
169 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
\r
170 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
\r
171 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
\r
172 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
\r
173 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
\r
174 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
\r
175 0x20D0, 0x20DC, 0x302A, 0x302F,
\r
178 int[] combiningCharChar = {
\r
179 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
\r
180 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
\r
181 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
\r
182 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
\r
186 // [88] Digit ::= ...
\r
189 int[] digitRange = {
\r
190 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
\r
191 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
\r
192 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
\r
193 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
\r
197 // [89] Extender ::= ...
\r
200 int[] extenderRange = {
\r
201 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
\r
204 int[] extenderChar = {
\r
205 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
\r
209 // SpecialChar ::= '<', '&', '\n', '\r', ']'
\r
212 int[] specialChar = {
\r
213 '<', '&', '\n', '\r', ']',
\r
220 // set valid characters
\r
221 for (int i = 0; i < charRange.Length; i += 2)
\r
223 for (int j = charRange[i]; j <= charRange[i + 1]; j++)
\r
225 CHARS[j] = (byte)(CHARS[j] | VALID | CONTENT);
\r
229 // remove special characters
\r
230 for (int i = 0; i < specialChar.Length; i++)
\r
232 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~CONTENT);
\r
235 // set space characters
\r
236 for (int i = 0; i < spaceChar.Length; i++)
\r
238 CHARS[spaceChar[i]] = (byte)(CHARS[spaceChar[i]] | SPACE);
\r
241 // set name start characters
\r
242 for (int i = 0; i < nameStartChar.Length; i++)
\r
244 CHARS[nameStartChar[i]] = (byte)(CHARS[nameStartChar[i]] | NAME_START | NAME |
\r
245 NCNAME_START | NCNAME);
\r
247 for (int i = 0; i < letterRange.Length; i += 2)
\r
249 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++)
\r
251 CHARS[j] = (byte)(CHARS[j] | NAME_START | NAME | NCNAME_START | NCNAME);
\r
254 for (int i = 0; i < letterChar.Length; i++)
\r
256 CHARS[letterChar[i]] = (byte)(CHARS[letterChar[i]] | NAME_START | NAME |
\r
257 NCNAME_START | NCNAME);
\r
260 // set name characters
\r
261 for (int i = 0; i < nameChar.Length; i++)
\r
263 CHARS[nameChar[i]] = (byte)(CHARS[nameChar[i]] | NAME | NCNAME);
\r
265 for (int i = 0; i < digitRange.Length; i += 2)
\r
267 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++)
\r
269 CHARS[j] = (byte)(CHARS[j] | NAME | NCNAME);
\r
272 for (int i = 0; i < combiningCharRange.Length; i += 2)
\r
274 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++)
\r
276 CHARS[j] = (byte)(CHARS[j] | NAME | NCNAME);
\r
279 for (int i = 0; i < combiningCharChar.Length; i++)
\r
281 CHARS[combiningCharChar[i]] = (byte)(CHARS[combiningCharChar[i]] | NAME | NCNAME);
\r
283 for (int i = 0; i < extenderRange.Length; i += 2)
\r
285 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++)
\r
287 CHARS[j] = (byte)(CHARS[j] | NAME | NCNAME);
\r
290 for (int i = 0; i < extenderChar.Length; i++)
\r
292 CHARS[extenderChar[i]] = (byte)(CHARS[extenderChar[i]] | NAME | NCNAME);
\r
295 // remove ':' from allowable NCNAME_START and NCNAME chars
\r
296 CHARS[':'] = (byte)(CHARS[':'] & ~(NCNAME_START | NCNAME));
\r
298 // set Pubid characters
\r
299 for (int i = 0; i < pubidChar.Length; i++)
\r
301 CHARS[pubidChar[i]] = (byte)(CHARS[pubidChar[i]] | PUBID);
\r
303 for (int i = 0; i < pubidRange.Length; i += 2)
\r
305 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++)
\r
307 CHARS[j] = (byte)(CHARS[j] | PUBID);
\r
312 private XmlConstructs()
\r
320 /// Returns true if the specified character is valid.
\r
322 /// <param name="c">The character to check.</param>
\r
323 public static bool IsValid(char c)
\r
325 return (CHARS[c] & VALID) != 0;
\r
328 public static bool IsValid(int c)
\r
330 return c > 0 && c < CHARS.Length && (CHARS[c] & VALID) != 0;
\r
334 /// Returns true if the specified character is invalid.
\r
336 /// <param name="c">The character to check.</param>
\r
337 public static bool IsInvalid(char c)
\r
339 return !IsValid(c);
\r
342 public static bool IsInvalid(int c)
\r
344 return !IsValid(c);
\r
348 /// Returns true if the specified character can be considered content.
\r
350 /// <param name="c">The character to check.</param>
\r
351 public static bool IsContent(char c)
\r
353 return (CHARS[c] & CONTENT) != 0;
\r
356 public static bool IsContent(int c)
\r
358 return c > 0 && c < CHARS.Length && (CHARS[c] & CONTENT) != 0;
\r
362 /// Returns true if the specified character can be considered markup.
\r
363 /// Markup characters include '<', '&', and '%'.
\r
365 /// <param name="c">The character to check.</param>
\r
366 public static bool IsMarkup(char c)
\r
368 return c == '<' || c == '&' || c == '%';
\r
371 public static bool IsMarkup(int c)
\r
373 return c > 0 && c < CHARS.Length && (c == '<' || c == '&' || c == '%');
\r
377 /// Returns true if the specified character is a space character
\r
378 /// as defined by production [3] in the XML 1.0 specification.
\r
380 /// <param name="c">The character to check.</param>
\r
381 /// <returns></returns>
\r
382 public static bool IsSpace(char c)
\r
384 return (CHARS[c] & SPACE) != 0;
\r
387 public static bool IsSpace(int c)
\r
389 return c > 0 && c < CHARS.Length && (CHARS[c] & SPACE) != 0;
\r
393 /// Returns true if the specified character is a valid name start
\r
394 /// character as defined by production [5] in the XML 1.0 specification.
\r
396 /// <param name="c">The character to check.</param>
\r
397 public static bool IsNameStart(char c)
\r
399 return (CHARS[c] & NAME_START) != 0;
\r
402 public static bool IsNameStart(int c)
\r
404 return c > 0 && c < CHARS.Length && (CHARS[c] & NAME_START) != 0;
\r
408 /// Returns true if the specified character is a valid name
\r
409 /// character as defined by production [4] in the XML 1.0 specification.
\r
411 /// <param name="c">The character to check.</param>
\r
412 public static bool IsName(char c)
\r
414 return (CHARS[c] & NAME) != 0;
\r
417 public static bool IsName(int c)
\r
419 return c > 0 && c < CHARS.Length && (CHARS[c] & NAME) != 0;
\r
423 /// Returns true if the specified character is a valid NCName start
\r
424 /// character as defined by production [4] in Namespaces in XML
\r
425 /// recommendation.
\r
427 /// <param name="c">The character to check.</param>
\r
428 /// <returns></returns>
\r
429 public static bool IsNCNameStart(char c)
\r
431 return (CHARS[c] & NCNAME_START) != 0;
\r
434 public static bool IsNCNameStart(int c)
\r
436 return c > 0 && c < CHARS.Length && (CHARS[c] & NCNAME_START) != 0;
\r
440 /// Returns true if the specified character is a valid NCName
\r
441 /// character as defined by production [5] in Namespaces in XML
\r
442 /// recommendation.
\r
444 /// <param name="c"></param>
\r
445 /// <returns></returns>
\r
446 public static bool IsNCName(char c)
\r
448 return (CHARS[c] & NCNAME) != 0;
\r
451 public static bool IsNCName(int c)
\r
453 return c > 0 && c < CHARS.Length && (CHARS[c] & NCNAME) != 0;
\r
457 /// Returns true if the specified character is a valid Pubid
\r
458 /// character as defined by production [13] in the XML 1.0 specification.
\r
460 /// <param name="c">The character to check</param>
\r
461 public static bool IsPubid(char c)
\r
463 return (CHARS[c] & PUBID) != 0;
\r
466 public static bool IsPubid(int c)
\r
468 return c > 0 && c < CHARS.Length && (CHARS[c] & PUBID) != 0;
\r
472 /// Check to see if a string is a valid Name according to [5]
\r
473 /// in the XML 1.0 Recommendation
\r
475 /// <param name="name">The string to check</param>
\r
476 public static bool IsValidName(String name, out Exception err)
\r
479 if (name.Length == 0)
\r
481 err = new XmlException("Name can not be an empty string",null);
\r
485 if( IsNameStart(ch) == false)
\r
487 err = new XmlException("The character '"+ch+"' cannot start a Name",null);
\r
490 for (int i = 1; i < name.Length; i++ )
\r
493 if( IsName( ch ) == false )
\r
495 err = new XmlException("The character '"+ch+"' is not allowed in a Name",null);
\r
502 public static int IsValidName (string name)
\r
504 if (name.Length == 0)
\r
506 if (!IsNameStart (name [0]))
\r
508 for (int i=1; i<name.Length; i++)
\r
509 if (!IsName (name [i]))
\r
515 /// Check to see if a string is a valid NCName according to [4]
\r
516 /// from the XML Namespaces 1.0 Recommendation
\r
518 /// <param name="ncName">The string to check</param>
\r
519 public static bool IsValidNCName(String ncName, out Exception err)
\r
522 if (ncName.Length == 0)
\r
524 err = new XmlException("NCName can not be an empty string",null);
\r
527 char ch = ncName[0];
\r
528 if( IsNCNameStart(ch) == false)
\r
530 err = new XmlException("The character '"+ch+"' cannot start a NCName",null);
\r
533 for (int i = 1; i < ncName.Length; i++ )
\r
536 if( IsNCName( ch ) == false )
\r
538 err = new XmlException("The character '"+ch+"' is not allowed in a NCName",null);
\r
546 /// Check to see if a string is a valid Nmtoken according to [7]
\r
547 /// in the XML 1.0 Recommendation
\r
549 /// <param name="nmtoken">The string to check.</param>
\r
550 public static bool IsValidNmtoken(String nmtoken, out Exception err)
\r
553 if (nmtoken.Length == 0)
\r
555 err = new XmlException("NMTOKEN can not be an empty string", null);
\r
558 for (int i = 0; i < nmtoken.Length; i++ )
\r
560 char ch = nmtoken[i];
\r
561 if( ! IsName( ch ) )
\r
563 err = new XmlException("The character '"+ch+"' is not allowed in a NMTOKEN",null);
\r
573 /// Returns true if the encoding name is a valid IANA encoding.
\r
574 /// This method does not verify that there is a decoder available
\r
575 /// for this encoding, only that the characters are valid for an
\r
576 /// IANA encoding name.
\r
578 /// <param name="ianaEncoding">The encoding to check.</param>
\r
579 /// <returns></returns>
\r
580 public static bool IsValidIANAEncoding(String ianaEncoding)
\r
582 if (ianaEncoding != null)
\r
584 int length = ianaEncoding.Length;
\r
587 char c = ianaEncoding[0];
\r
588 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
\r
590 for (int i = 1; i < length; i++)
\r
592 c = ianaEncoding[i];
\r
593 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
\r
594 (c < '0' || c > '9') && c != '.' && c != '_' &&
\r