5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // small xml parser that is mostly compatible with
34 using System.Collections;
35 using System.Globalization;
41 internal sealed class DefaultHandler : SmallXmlParser.IContentHandler
43 public void OnStartParsing (SmallXmlParser parser)
47 public void OnEndParsing (SmallXmlParser parser)
51 public void OnStartElement (string name, SmallXmlParser.IAttrList attrs)
55 public void OnEndElement (string name)
59 public void OnChars (string s)
63 public void OnIgnorableWhitespace (string s)
67 public void OnProcessingInstruction (string name, string text)
72 internal class SmallXmlParser
74 internal interface IContentHandler
76 void OnStartParsing (SmallXmlParser parser);
77 void OnEndParsing (SmallXmlParser parser);
78 void OnStartElement (string name, IAttrList attrs);
79 void OnEndElement (string name);
80 void OnProcessingInstruction (string name, string text);
81 void OnChars (string text);
82 void OnIgnorableWhitespace (string text);
85 internal interface IAttrList
89 string GetName (int i);
90 string GetValue (int i);
91 string GetValue (string name);
92 string [] Names { get; }
93 string [] Values { get; }
96 sealed class AttrListImpl : IAttrList
99 get { return attrNames.Count; }
101 public bool IsEmpty {
102 get { return attrNames.Count == 0; }
104 public string GetName (int i)
106 return (string) attrNames [i];
108 public string GetValue (int i)
110 return (string) attrValues [i];
112 public string GetValue (string name)
114 for (int i = 0; i < attrNames.Count; i++)
115 if ((string) attrNames [i] == name)
116 return (string) attrValues [i];
119 public string [] Names {
120 get { return (string []) attrNames.ToArray (typeof (string)); }
122 public string [] Values {
123 get { return (string []) attrValues.ToArray (typeof (string)); }
126 ArrayList attrNames = new ArrayList ();
127 ArrayList attrValues = new ArrayList ();
129 internal void Clear ()
135 internal void Add (string name, string value)
137 attrNames.Add (name);
138 attrValues.Add (value);
142 IContentHandler handler;
144 Stack elementNames = new Stack ();
145 Stack xmlSpaces = new Stack ();
147 StringBuilder buffer = new StringBuilder (200);
148 char [] nameBuffer = new char [30];
151 AttrListImpl attributes = new AttrListImpl ();
152 int line = 1, column;
155 public SmallXmlParser ()
159 private Exception Error (string msg)
161 return new SmallXmlParserException (msg, line, column);
164 private Exception UnexpectedEndError ()
166 string [] arr = new string [elementNames.Count];
167 // COMPACT FRAMEWORK NOTE: CopyTo is not visible through the Stack class
168 (elementNames as ICollection).CopyTo (arr, 0);
169 return Error (String.Format (
170 "Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
174 private bool IsNameChar (char c, bool start)
184 if (c > 0x100) { // optional condition for optimization
191 if ('\u02BB' <= c && c <= '\u02C1')
194 switch (Char.GetUnicodeCategory (c)) {
195 case UnicodeCategory.LowercaseLetter:
196 case UnicodeCategory.UppercaseLetter:
197 case UnicodeCategory.OtherLetter:
198 case UnicodeCategory.TitlecaseLetter:
199 case UnicodeCategory.LetterNumber:
201 case UnicodeCategory.SpacingCombiningMark:
202 case UnicodeCategory.EnclosingMark:
203 case UnicodeCategory.NonSpacingMark:
204 case UnicodeCategory.ModifierLetter:
205 case UnicodeCategory.DecimalDigitNumber:
212 private bool IsWhitespace (int c)
226 public void SkipWhitespaces ()
228 SkipWhitespaces (false);
231 private void HandleWhitespaces ()
233 while (IsWhitespace (Peek ()))
234 buffer.Append ((char) Read ());
235 if (Peek () != '<' && Peek () >= 0)
236 isWhitespace = false;
239 public void SkipWhitespaces (bool expected)
253 throw Error ("Whitespace is expected.");
261 return reader.Peek ();
266 int i = reader.Read ();
279 public void Expect (int c)
283 throw UnexpectedEndError ();
285 throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
288 private string ReadUntil (char until, bool handleReferences)
292 throw UnexpectedEndError ();
293 char c = (char) Read ();
296 else if (handleReferences && c == '&')
301 string ret = buffer.ToString ();
306 public string ReadName ()
309 if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
310 throw Error ("XML name start character is expected.");
311 for (int i = Peek (); i >= 0; i = Peek ()) {
313 if (!IsNameChar (c, false))
315 if (idx == nameBuffer.Length) {
316 char [] tmp = new char [idx * 2];
317 // COMPACT FRAMEWORK NOTE: Array.Copy(sourceArray, destinationArray, count) is not available.
318 Array.Copy (nameBuffer, 0, tmp, 0, idx);
321 nameBuffer [idx++] = c;
325 throw Error ("Valid XML name is expected.");
326 return new string (nameBuffer, 0, idx);
330 public void Parse (TextReader input, IContentHandler handler)
333 this.handler = handler;
335 handler.OnStartParsing (this);
339 HandleBufferedContent ();
340 if (elementNames.Count > 0)
341 throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
343 handler.OnEndParsing (this);
348 private void Cleanup ()
355 elementNames = new Stack ();
356 xmlSpaces = new Stack ();
358 elementNames.Clear ();
364 isWhitespace = false;
367 public void ReadContent ()
370 if (IsWhitespace (Peek ())) {
371 if (buffer.Length == 0)
373 HandleWhitespaces ();
375 if (Peek () == '<') {
378 case '!': // declarations
380 if (Peek () == '[') {
382 if (ReadName () != "CDATA")
383 throw Error ("Invalid declaration markup");
388 else if (Peek () == '-') {
392 else if (ReadName () != "DOCTYPE")
393 throw Error ("Invalid declaration markup.");
395 throw Error ("This parser does not support document type.");
397 HandleBufferedContent ();
401 string text = String.Empty;
402 if (Peek () != '?') {
404 text += ReadUntil ('?', false);
410 handler.OnProcessingInstruction (
414 case '/': // end tags
415 HandleBufferedContent ();
416 if (elementNames.Count == 0)
417 throw UnexpectedEndError ();
421 string expected = (string) elementNames.Pop ();
423 if (xmlSpaces.Count > 0)
424 xmlSpace = (string) xmlSpaces.Peek ();
427 if (name != expected)
428 throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
429 handler.OnEndElement (name);
432 default: // start tags (including empty tags)
433 HandleBufferedContent ();
435 while (Peek () != '>' && Peek () != '/')
436 ReadAttribute (attributes);
437 handler.OnStartElement (name, attributes);
440 if (Peek () == '/') {
442 handler.OnEndElement (name);
445 elementNames.Push (name);
446 xmlSpaces.Push (xmlSpace);
456 private void HandleBufferedContent ()
458 if (buffer.Length == 0)
461 handler.OnIgnorableWhitespace (buffer.ToString ());
463 handler.OnChars (buffer.ToString ());
465 isWhitespace = false;
468 private void ReadCharacters ()
470 isWhitespace = false;
483 buffer.Append ((char) Read ());
489 private void ReadReference ()
491 if (Peek () == '#') {
492 // character reference
494 ReadCharacterReference ();
496 string name = ReadName ();
506 buffer.Append ('\'');
515 throw Error ("General non-predefined entity reference is not supported in this parser.");
520 private int ReadCharacterReference ()
523 if (Peek () == 'x') { // hex
525 for (int i = Peek (); i >= 0; i = Peek ()) {
526 if ('0' <= i && i <= '9')
527 n = n << 4 + i - '0';
528 else if ('A' <= i && i <='F')
529 n = n << 4 + i - 'A' + 10;
530 else if ('a' <= i && i <='f')
531 n = n << 4 + i - 'a' + 10;
537 for (int i = Peek (); i >= 0; i = Peek ()) {
538 if ('0' <= i && i <= '9')
539 n = n << 4 + i - '0';
548 private void ReadAttribute (AttrListImpl a)
550 SkipWhitespaces (true);
551 if (Peek () == '/' || Peek () == '>')
552 // came here just to spend trailing whitespaces
555 string name = ReadName ();
562 value = ReadUntil ('\'', true);
565 value = ReadUntil ('"', true);
568 throw Error ("Invalid attribute value markup.");
570 if (name == "xml:space")
575 private void ReadCDATASection ()
580 throw UnexpectedEndError ();
581 char c = (char) Read ();
584 else if (c == '>' && nBracket > 1) {
585 for (int i = nBracket; i > 2; i--)
590 for (int i = 0; i < nBracket; i++)
598 private void ReadComment ()
608 throw Error ("'--' is not allowed inside comment markup.");
614 internal sealed class SmallXmlParserException : SystemException
619 public SmallXmlParserException (string msg, int line, int column)
620 : base (String.Format ("{0}. At ({1},{2})", msg, line, column))
623 this.column = column;
631 get { return column; }