5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // small xml parser that is mostly compatible with
34 using System.Collections;
35 using System.Globalization;
46 class DefaultHandler : SmallXmlParser.IContentHandler
48 public void OnStartParsing (SmallXmlParser parser)
52 public void OnEndParsing (SmallXmlParser parser)
56 public void OnStartElement (string name, SmallXmlParser.IAttrList attrs)
60 public void OnEndElement (string name)
64 public void OnChars (string s)
68 public void OnIgnorableWhitespace (string s)
72 public void OnProcessingInstruction (string name, string text)
84 public interface IContentHandler
86 void OnStartParsing (SmallXmlParser parser);
87 void OnEndParsing (SmallXmlParser parser);
88 void OnStartElement (string name, IAttrList attrs);
89 void OnEndElement (string name);
90 void OnProcessingInstruction (string name, string text);
91 void OnChars (string text);
92 void OnIgnorableWhitespace (string text);
95 public interface IAttrList
99 string GetName (int i);
100 string GetValue (int i);
101 string GetValue (string name);
102 string [] Names { get; }
103 string [] Values { get; }
106 class AttrListImpl : IAttrList
109 get { return attrNames.Count; }
111 public bool IsEmpty {
112 get { return attrNames.Count == 0; }
114 public string GetName (int i)
116 return (string) attrNames [i];
118 public string GetValue (int i)
120 return (string) attrValues [i];
122 public string GetValue (string name)
124 for (int i = 0; i < attrNames.Count; i++)
125 if ((string) attrNames [i] == name)
126 return (string) attrValues [i];
129 public string [] Names {
130 get { return (string []) attrNames.ToArray (typeof (string)); }
132 public string [] Values {
133 get { return (string []) attrValues.ToArray (typeof (string)); }
136 ArrayList attrNames = new ArrayList ();
137 ArrayList attrValues = new ArrayList ();
139 internal void Clear ()
145 internal void Add (string name, string value)
147 attrNames.Add (name);
148 attrValues.Add (value);
152 IContentHandler handler;
154 Stack elementNames = new Stack ();
155 Stack xmlSpaces = new Stack ();
157 StringBuilder buffer = new StringBuilder (200);
158 char [] nameBuffer = new char [30];
161 AttrListImpl attributes = new AttrListImpl ();
162 int line = 1, column;
165 public SmallXmlParser ()
169 private Exception Error (string msg)
171 return new SmallXmlParserException (msg, line, column);
174 private Exception UnexpectedEndError ()
176 string [] arr = new string [elementNames.Count];
177 elementNames.CopyTo (arr, 0);
178 return Error (String.Format (
179 "Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
183 private bool IsNameChar (char c, bool start)
193 if (c > 0x100) { // optional condition for optimization
200 if ('\u02BB' <= c && c <= '\u02C1')
203 switch (Char.GetUnicodeCategory (c)) {
204 case UnicodeCategory.LowercaseLetter:
205 case UnicodeCategory.UppercaseLetter:
206 case UnicodeCategory.OtherLetter:
207 case UnicodeCategory.TitlecaseLetter:
208 case UnicodeCategory.LetterNumber:
210 case UnicodeCategory.SpacingCombiningMark:
211 case UnicodeCategory.EnclosingMark:
212 case UnicodeCategory.NonSpacingMark:
213 case UnicodeCategory.ModifierLetter:
214 case UnicodeCategory.DecimalDigitNumber:
221 private bool IsWhitespace (int c)
235 public void SkipWhitespaces ()
237 SkipWhitespaces (false);
240 private void HandleWhitespaces ()
242 while (IsWhitespace (Peek ()))
243 buffer.Append ((char) Read ());
244 if (Peek () != '<' && Peek () >= 0)
245 isWhitespace = false;
248 public void SkipWhitespaces (bool expected)
262 throw Error ("Whitespace is expected.");
270 return reader.Peek ();
275 int i = reader.Read ();
288 public void Expect (int c)
292 throw UnexpectedEndError ();
294 throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
297 private string ReadUntil (char until, bool handleReferences)
301 throw UnexpectedEndError ();
302 char c = (char) Read ();
305 else if (handleReferences && c == '&')
310 string ret = buffer.ToString ();
315 public string ReadName ()
318 if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
319 throw Error ("XML name start character is expected.");
320 for (int i = Peek (); i >= 0; i = Peek ()) {
322 if (!IsNameChar (c, false))
324 if (idx == nameBuffer.Length) {
325 char [] tmp = new char [idx * 2];
326 Array.Copy (nameBuffer, tmp, idx);
329 nameBuffer [idx++] = c;
333 throw Error ("Valid XML name is expected.");
334 return new string (nameBuffer, 0, idx);
338 public void Parse (TextReader input, IContentHandler handler)
341 this.handler = handler;
343 handler.OnStartParsing (this);
347 HandleBufferedContent ();
348 if (elementNames.Count > 0)
349 throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
351 handler.OnEndParsing (this);
356 private void Cleanup ()
362 elementNames.Clear ();
367 isWhitespace = false;
370 public void ReadContent ()
373 if (IsWhitespace (Peek ())) {
374 if (buffer.Length == 0)
376 HandleWhitespaces ();
378 if (Peek () == '<') {
381 case '!': // declarations
383 if (Peek () == '[') {
385 if (ReadName () != "CDATA")
386 throw Error ("Invalid declaration markup");
391 else if (Peek () == '-') {
395 else if (ReadName () != "DOCTYPE")
396 throw Error ("Invalid declaration markup.");
398 throw Error ("This parser does not support document type.");
400 HandleBufferedContent ();
404 string text = String.Empty;
405 if (Peek () != '?') {
407 text += ReadUntil ('?', false);
413 handler.OnProcessingInstruction (
417 case '/': // end tags
418 HandleBufferedContent ();
419 if (elementNames.Count == 0)
420 throw UnexpectedEndError ();
424 string expected = (string) elementNames.Pop ();
426 if (xmlSpaces.Count > 0)
427 xmlSpace = (string) xmlSpaces.Peek ();
430 if (name != expected)
431 throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
432 handler.OnEndElement (name);
435 default: // start tags (including empty tags)
436 HandleBufferedContent ();
438 while (Peek () != '>' && Peek () != '/')
439 ReadAttribute (attributes);
440 handler.OnStartElement (name, attributes);
443 if (Peek () == '/') {
445 handler.OnEndElement (name);
448 elementNames.Push (name);
449 xmlSpaces.Push (xmlSpace);
459 private void HandleBufferedContent ()
461 if (buffer.Length == 0)
464 handler.OnIgnorableWhitespace (buffer.ToString ());
466 handler.OnChars (buffer.ToString ());
468 isWhitespace = false;
471 private void ReadCharacters ()
473 isWhitespace = false;
486 buffer.Append ((char) Read ());
492 private void ReadReference ()
494 if (Peek () == '#') {
495 // character reference
497 ReadCharacterReference ();
499 string name = ReadName ();
509 buffer.Append ('\'');
518 throw Error ("General non-predefined entity reference is not supported in this parser.");
523 private int ReadCharacterReference ()
526 if (Peek () == 'x') { // hex
528 for (int i = Peek (); i >= 0; i = Peek ()) {
529 if ('0' <= i && i <= '9')
530 n = n << 4 + i - '0';
531 else if ('A' <= i && i <='F')
532 n = n << 4 + i - 'A' + 10;
533 else if ('a' <= i && i <='f')
534 n = n << 4 + i - 'a' + 10;
540 for (int i = Peek (); i >= 0; i = Peek ()) {
541 if ('0' <= i && i <= '9')
542 n = n << 4 + i - '0';
551 private void ReadAttribute (AttrListImpl a)
553 SkipWhitespaces (true);
554 if (Peek () == '/' || Peek () == '>')
555 // came here just to spend trailing whitespaces
558 string name = ReadName ();
565 value = ReadUntil ('\'', true);
568 value = ReadUntil ('"', true);
571 throw Error ("Invalid attribute value markup.");
573 if (name == "xml:space")
578 private void ReadCDATASection ()
583 throw UnexpectedEndError ();
584 char c = (char) Read ();
587 else if (c == '>' && nBracket > 1) {
588 for (int i = nBracket; i > 2; i--)
593 for (int i = 0; i < nBracket; i++)
601 private void ReadComment ()
611 throw Error ("'--' is not allowed inside comment markup.");
620 [CLSCompliant(false)]
623 class SmallXmlParserException : SystemException
628 public SmallXmlParserException (string msg, int line, int column)
629 : base (String.Format ("{0}. At ({1},{2})", msg, line, column))
632 this.column = column;
640 get { return column; }