5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // small xml parser that is mostly compatible with
34 using System.Collections;
35 using System.Collections.Generic;
36 using System.Globalization;
47 class DefaultHandler : SmallXmlParser.IContentHandler
49 public void OnStartParsing (SmallXmlParser parser)
53 public void OnEndParsing (SmallXmlParser parser)
57 public void OnStartElement (string name, SmallXmlParser.IAttrList attrs)
61 public void OnEndElement (string name)
65 public void OnChars (string s)
69 public void OnIgnorableWhitespace (string s)
73 public void OnProcessingInstruction (string name, string text)
85 public interface IContentHandler
87 void OnStartParsing (SmallXmlParser parser);
88 void OnEndParsing (SmallXmlParser parser);
89 void OnStartElement (string name, IAttrList attrs);
90 void OnEndElement (string name);
91 void OnProcessingInstruction (string name, string text);
92 void OnChars (string text);
93 void OnIgnorableWhitespace (string text);
96 public interface IAttrList
100 string GetName (int i);
101 string GetValue (int i);
102 string GetValue (string name);
103 string [] Names { get; }
104 string [] Values { get; }
107 class AttrListImpl : IAttrList
110 get { return attrNames.Count; }
112 public bool IsEmpty {
113 get { return attrNames.Count == 0; }
115 public string GetName (int i)
117 return attrNames [i];
119 public string GetValue (int i)
121 return attrValues [i];
123 public string GetValue (string name)
125 for (int i = 0; i < attrNames.Count; i++)
126 if (attrNames [i] == name)
127 return attrValues [i];
130 public string [] Names {
131 get { return attrNames.ToArray (); }
133 public string [] Values {
134 get { return attrValues.ToArray (); }
137 List<string> attrNames = new List<string> ();
138 List<string> attrValues = new List<string> ();
140 internal void Clear ()
146 internal void Add (string name, string value)
148 attrNames.Add (name);
149 attrValues.Add (value);
153 IContentHandler handler;
155 Stack elementNames = new Stack ();
156 Stack xmlSpaces = new Stack ();
158 StringBuilder buffer = new StringBuilder (200);
159 char [] nameBuffer = new char [30];
162 AttrListImpl attributes = new AttrListImpl ();
163 int line = 1, column;
166 public SmallXmlParser ()
170 private Exception Error (string msg)
172 return new SmallXmlParserException (msg, line, column);
175 private Exception UnexpectedEndError ()
177 string [] arr = new string [elementNames.Count];
178 elementNames.CopyTo (arr, 0);
179 return Error (String.Format (
180 "Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
184 private bool IsNameChar (char c, bool start)
194 if (c > 0x100) { // optional condition for optimization
201 if ('\u02BB' <= c && c <= '\u02C1')
204 switch (Char.GetUnicodeCategory (c)) {
205 case UnicodeCategory.LowercaseLetter:
206 case UnicodeCategory.UppercaseLetter:
207 case UnicodeCategory.OtherLetter:
208 case UnicodeCategory.TitlecaseLetter:
209 case UnicodeCategory.LetterNumber:
211 case UnicodeCategory.SpacingCombiningMark:
212 case UnicodeCategory.EnclosingMark:
213 case UnicodeCategory.NonSpacingMark:
214 case UnicodeCategory.ModifierLetter:
215 case UnicodeCategory.DecimalDigitNumber:
222 private bool IsWhitespace (int c)
236 public void SkipWhitespaces ()
238 SkipWhitespaces (false);
241 private void HandleWhitespaces ()
243 while (IsWhitespace (Peek ()))
244 buffer.Append ((char) Read ());
245 if (Peek () != '<' && Peek () >= 0)
246 isWhitespace = false;
249 public void SkipWhitespaces (bool expected)
263 throw Error ("Whitespace is expected.");
271 return reader.Peek ();
276 int i = reader.Read ();
289 public void Expect (int c)
293 throw UnexpectedEndError ();
295 throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
298 private string ReadUntil (char until, bool handleReferences)
302 throw UnexpectedEndError ();
303 char c = (char) Read ();
306 else if (handleReferences && c == '&')
311 string ret = buffer.ToString ();
316 public string ReadName ()
319 if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
320 throw Error ("XML name start character is expected.");
321 for (int i = Peek (); i >= 0; i = Peek ()) {
323 if (!IsNameChar (c, false))
325 if (idx == nameBuffer.Length) {
326 char [] tmp = new char [idx * 2];
327 Array.Copy (nameBuffer, tmp, idx);
330 nameBuffer [idx++] = c;
334 throw Error ("Valid XML name is expected.");
335 return new string (nameBuffer, 0, idx);
339 public void Parse (TextReader input, IContentHandler handler)
342 this.handler = handler;
344 handler.OnStartParsing (this);
348 HandleBufferedContent ();
349 if (elementNames.Count > 0)
350 throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
352 handler.OnEndParsing (this);
357 private void Cleanup ()
363 elementNames.Clear ();
368 isWhitespace = false;
371 public void ReadContent ()
374 if (IsWhitespace (Peek ())) {
375 if (buffer.Length == 0)
377 HandleWhitespaces ();
379 if (Peek () == '<') {
382 case '!': // declarations
384 if (Peek () == '[') {
386 if (ReadName () != "CDATA")
387 throw Error ("Invalid declaration markup");
392 else if (Peek () == '-') {
396 else if (ReadName () != "DOCTYPE")
397 throw Error ("Invalid declaration markup.");
399 throw Error ("This parser does not support document type.");
401 HandleBufferedContent ();
405 string text = String.Empty;
406 if (Peek () != '?') {
408 text += ReadUntil ('?', false);
414 handler.OnProcessingInstruction (
418 case '/': // end tags
419 HandleBufferedContent ();
420 if (elementNames.Count == 0)
421 throw UnexpectedEndError ();
425 string expected = (string) elementNames.Pop ();
427 if (xmlSpaces.Count > 0)
428 xmlSpace = (string) xmlSpaces.Peek ();
431 if (name != expected)
432 throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
433 handler.OnEndElement (name);
436 default: // start tags (including empty tags)
437 HandleBufferedContent ();
439 while (Peek () != '>' && Peek () != '/')
440 ReadAttribute (attributes);
441 handler.OnStartElement (name, attributes);
444 if (Peek () == '/') {
446 handler.OnEndElement (name);
449 elementNames.Push (name);
450 xmlSpaces.Push (xmlSpace);
460 private void HandleBufferedContent ()
462 if (buffer.Length == 0)
465 handler.OnIgnorableWhitespace (buffer.ToString ());
467 handler.OnChars (buffer.ToString ());
469 isWhitespace = false;
472 private void ReadCharacters ()
474 isWhitespace = false;
487 buffer.Append ((char) Read ());
493 private void ReadReference ()
495 if (Peek () == '#') {
496 // character reference
498 ReadCharacterReference ();
500 string name = ReadName ();
510 buffer.Append ('\'');
519 throw Error ("General non-predefined entity reference is not supported in this parser.");
524 private int ReadCharacterReference ()
527 if (Peek () == 'x') { // hex
529 for (int i = Peek (); i >= 0; i = Peek ()) {
530 if ('0' <= i && i <= '9')
531 n = n << 4 + i - '0';
532 else if ('A' <= i && i <='F')
533 n = n << 4 + i - 'A' + 10;
534 else if ('a' <= i && i <='f')
535 n = n << 4 + i - 'a' + 10;
541 for (int i = Peek (); i >= 0; i = Peek ()) {
542 if ('0' <= i && i <= '9')
543 n = n << 4 + i - '0';
552 private void ReadAttribute (AttrListImpl a)
554 SkipWhitespaces (true);
555 if (Peek () == '/' || Peek () == '>')
556 // came here just to spend trailing whitespaces
559 string name = ReadName ();
566 value = ReadUntil ('\'', true);
569 value = ReadUntil ('"', true);
572 throw Error ("Invalid attribute value markup.");
574 if (name == "xml:space")
579 private void ReadCDATASection ()
584 throw UnexpectedEndError ();
585 char c = (char) Read ();
588 else if (c == '>' && nBracket > 1) {
589 for (int i = nBracket; i > 2; i--)
594 for (int i = 0; i < nBracket; i++)
602 private void ReadComment ()
612 throw Error ("'--' is not allowed inside comment markup.");
621 [CLSCompliant(false)]
624 class SmallXmlParserException : SystemException
629 public SmallXmlParserException (string msg, int line, int column)
630 : base (String.Format ("{0}. At ({1},{2})", msg, line, column))
633 this.column = column;
641 get { return column; }