c57b2a2099418c8e15d68965b55efc13c4ff546c
[mono.git] / mcs / tools / cil-strip / Mono.Xml / SmallXmlParser.cs
1 //
2 // SmallXmlParser.cs
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 //
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 //
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 //
30 // small xml parser that is mostly compatible with
31 //
32
33 using System;
34 using System.Collections;
35 using System.Globalization;
36 using System.IO;
37 using System.Text;
38
39 namespace Mono.Xml
40 {
41         internal sealed class DefaultHandler : SmallXmlParser.IContentHandler
42         {
43                 public void OnStartParsing (SmallXmlParser parser)
44                 {
45                 }
46
47                 public void OnEndParsing (SmallXmlParser parser)
48                 {
49                 }
50
51                 public void OnStartElement (string name, SmallXmlParser.IAttrList attrs)
52                 {
53                 }
54
55                 public void OnEndElement (string name)
56                 {
57                 }
58
59                 public void OnChars (string s)
60                 {
61                 }
62
63                 public void OnIgnorableWhitespace (string s)
64                 {
65                 }
66
67                 public void OnProcessingInstruction (string name, string text)
68                 {
69                 }
70         }
71
72         internal class SmallXmlParser
73         {
74                 internal interface IContentHandler
75                 {
76                         void OnStartParsing (SmallXmlParser parser);
77                         void OnEndParsing (SmallXmlParser parser);
78                         void OnStartElement (string name, IAttrList attrs);
79                         void OnEndElement (string name);
80                         void OnProcessingInstruction (string name, string text);
81                         void OnChars (string text);
82                         void OnIgnorableWhitespace (string text);
83                 }
84
85                 internal interface IAttrList
86                 {
87                         int Length { get; }
88                         bool IsEmpty { get; }
89                         string GetName (int i);
90                         string GetValue (int i);
91                         string GetValue (string name);
92                         string [] Names { get; }
93                         string [] Values { get; }
94                 }
95
96                 sealed class AttrListImpl : IAttrList
97                 {
98                         public int Length {
99                                 get { return attrNames.Count; }
100                         }
101                         public bool IsEmpty {
102                                 get { return attrNames.Count == 0; }
103                         }
104                         public string GetName (int i)
105                         {
106                                 return (string) attrNames [i];
107                         }
108                         public string GetValue (int i)
109                         {
110                                 return (string) attrValues [i];
111                         }
112                         public string GetValue (string name)
113                         {
114                                 for (int i = 0; i < attrNames.Count; i++)
115                                         if ((string) attrNames [i] == name)
116                                                 return (string) attrValues [i];
117                                 return null;
118                         }
119                         public string [] Names {
120                                 get { return (string []) attrNames.ToArray (typeof (string)); }
121                         }
122                         public string [] Values {
123                                 get { return (string []) attrValues.ToArray (typeof (string)); }
124                         }
125
126                         ArrayList attrNames = new ArrayList ();
127                         ArrayList attrValues = new ArrayList ();
128
129                         internal void Clear ()
130                         {
131                                 attrNames.Clear ();
132                                 attrValues.Clear ();
133                         }
134
135                         internal void Add (string name, string value)
136                         {
137                                 attrNames.Add (name);
138                                 attrValues.Add (value);
139                         }
140                 }
141
142                 IContentHandler handler;
143                 TextReader reader;
144                 Stack elementNames = new Stack ();
145                 Stack xmlSpaces = new Stack ();
146                 string xmlSpace;
147                 StringBuilder buffer = new StringBuilder (200);
148                 char [] nameBuffer = new char [30];
149                 bool isWhitespace;
150
151                 AttrListImpl attributes = new AttrListImpl ();
152                 int line = 1, column;
153                 bool resetColumn;
154
155                 public SmallXmlParser ()
156                 {
157                 }
158
159                 private Exception Error (string msg)
160                 {
161                         return new SmallXmlParserException (msg, line, column);
162                 }
163
164                 private Exception UnexpectedEndError ()
165                 {
166                         string [] arr = new string [elementNames.Count];
167                         // COMPACT FRAMEWORK NOTE: CopyTo is not visible through the Stack class
168                         (elementNames as ICollection).CopyTo (arr, 0);
169                         return Error (String.Format (
170                                                           "Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
171                 }
172
173
174                 private bool IsNameChar (char c, bool start)
175                 {
176                         switch (c) {
177                         case ':':
178                         case '_':
179                                 return true;
180                         case '-':
181                         case '.':
182                                 return !start;
183                         }
184                         if (c > 0x100) { // optional condition for optimization
185                                 switch (c) {
186                                 case '\u0559':
187                                 case '\u06E5':
188                                 case '\u06E6':
189                                         return true;
190                                 }
191                                 if ('\u02BB' <= c && c <= '\u02C1')
192                                         return true;
193                         }
194                         switch (Char.GetUnicodeCategory (c)) {
195                         case UnicodeCategory.LowercaseLetter:
196                         case UnicodeCategory.UppercaseLetter:
197                         case UnicodeCategory.OtherLetter:
198                         case UnicodeCategory.TitlecaseLetter:
199                         case UnicodeCategory.LetterNumber:
200                                 return true;
201                         case UnicodeCategory.SpacingCombiningMark:
202                         case UnicodeCategory.EnclosingMark:
203                         case UnicodeCategory.NonSpacingMark:
204                         case UnicodeCategory.ModifierLetter:
205                         case UnicodeCategory.DecimalDigitNumber:
206                                 return !start;
207                         default:
208                                 return false;
209                         }
210                 }
211
212                 private bool IsWhitespace (int c)
213                 {
214                         switch (c) {
215                         case ' ':
216                         case '\r':
217                         case '\t':
218                         case '\n':
219                                 return true;
220                         default:
221                                 return false;
222                         }
223                 }
224
225
226                 public void SkipWhitespaces ()
227                 {
228                         SkipWhitespaces (false);
229                 }
230
231                 private void HandleWhitespaces ()
232                 {
233                         while (IsWhitespace (Peek ()))
234                                 buffer.Append ((char) Read ());
235                         if (Peek () != '<' && Peek () >= 0)
236                                 isWhitespace = false;
237                 }
238
239                 public void SkipWhitespaces (bool expected)
240                 {
241                         while (true) {
242                                 switch (Peek ()) {
243                                 case ' ':
244                                 case '\r':
245                                 case '\t':
246                                 case '\n':
247                                         Read ();
248                                         if (expected)
249                                                 expected = false;
250                                         continue;
251                                 }
252                                 if (expected)
253                                         throw Error ("Whitespace is expected.");
254                                 return;
255                         }
256                 }
257
258
259                 private int Peek ()
260                 {
261                         return reader.Peek ();
262                 }
263
264                 private int Read ()
265                 {
266                         int i = reader.Read ();
267                         if (i == '\n')
268                                 resetColumn = true;
269                         if (resetColumn) {
270                                 line++;
271                                 resetColumn = false;
272                                 column = 1;
273                         }
274                         else
275                                 column++;
276                         return i;
277                 }
278
279                 public void Expect (int c)
280                 {
281                         int p = Read ();
282                         if (p < 0)
283                                 throw UnexpectedEndError ();
284                         else if (p != c)
285                                 throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
286                 }
287
288                 private string ReadUntil (char until, bool handleReferences)
289                 {
290                         while (true) {
291                                 if (Peek () < 0)
292                                         throw UnexpectedEndError ();
293                                 char c = (char) Read ();
294                                 if (c == until)
295                                         break;
296                                 else if (handleReferences && c == '&')
297                                         ReadReference ();
298                                 else
299                                         buffer.Append (c);
300                         }
301                         string ret = buffer.ToString ();
302                         buffer.Length = 0;
303                         return ret;
304                 }
305
306                 public string ReadName ()
307                 {
308                         int idx = 0;
309                         if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
310                                 throw Error ("XML name start character is expected.");
311                         for (int i = Peek (); i >= 0; i = Peek ()) {
312                                 char c = (char) i;
313                                 if (!IsNameChar (c, false))
314                                         break;
315                                 if (idx == nameBuffer.Length) {
316                                         char [] tmp = new char [idx * 2];
317                                         // COMPACT FRAMEWORK NOTE: Array.Copy(sourceArray, destinationArray, count) is not available.
318                                         Array.Copy (nameBuffer, 0, tmp, 0, idx);
319                                         nameBuffer = tmp;
320                                 }
321                                 nameBuffer [idx++] = c;
322                                 Read ();
323                         }
324                         if (idx == 0)
325                                 throw Error ("Valid XML name is expected.");
326                         return new string (nameBuffer, 0, idx);
327                 }
328
329
330                 public void Parse (TextReader input, IContentHandler handler)
331                 {
332                         this.reader = input;
333                         this.handler = handler;
334
335                         handler.OnStartParsing (this);
336
337                         while (Peek () >= 0)
338                                 ReadContent ();
339                         HandleBufferedContent ();
340                         if (elementNames.Count > 0)
341                                 throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
342
343                         handler.OnEndParsing (this);
344
345                         Cleanup ();
346                 }
347
348                 private void Cleanup ()
349                 {
350                         line = 1;
351                         column = 0;
352                         handler = null;
353                         reader = null;
354 #if CF_1_0
355                         elementNames = new Stack ();
356                         xmlSpaces = new Stack ();
357 #else
358                         elementNames.Clear ();
359                         xmlSpaces.Clear ();
360 #endif
361                         attributes.Clear ();
362                         buffer.Length = 0;
363                         xmlSpace = null;
364                         isWhitespace = false;
365                 }
366
367                 public void ReadContent ()
368                 {
369                         string name;
370                         if (IsWhitespace (Peek ())) {
371                                 if (buffer.Length == 0)
372                                         isWhitespace = true;
373                                 HandleWhitespaces ();
374                         }
375                         if (Peek () == '<') {
376                                 Read ();
377                                 switch (Peek ()) {
378                                 case '!': // declarations
379                                         Read ();
380                                         if (Peek () == '[') {
381                                                 Read ();
382                                                 if (ReadName () != "CDATA")
383                                                         throw Error ("Invalid declaration markup");
384                                                 Expect ('[');
385                                                 ReadCDATASection ();
386                                                 return;
387                                         }
388                                         else if (Peek () == '-') {
389                                                 ReadComment ();
390                                                 return;
391                                         }
392                                         else if (ReadName () != "DOCTYPE")
393                                                 throw Error ("Invalid declaration markup.");
394                                         else
395                                                 throw Error ("This parser does not support document type.");
396                                 case '?': // PIs
397                                         HandleBufferedContent ();
398                                         Read ();
399                                         name = ReadName ();
400                                         SkipWhitespaces ();
401                                         string text = String.Empty;
402                                         if (Peek () != '?') {
403                                                 while (true) {
404                                                         text += ReadUntil ('?', false);
405                                                         if (Peek () == '>')
406                                                                 break;
407                                                         text += "?";
408                                                 }
409                                         }
410                                         handler.OnProcessingInstruction (
411                                                 name, text);
412                                         Expect ('>');
413                                         return;
414                                 case '/': // end tags
415                                         HandleBufferedContent ();
416                                         if (elementNames.Count == 0)
417                                                 throw UnexpectedEndError ();
418                                         Read ();
419                                         name = ReadName ();
420                                         SkipWhitespaces ();
421                                         string expected = (string) elementNames.Pop ();
422                                         xmlSpaces.Pop ();
423                                         if (xmlSpaces.Count > 0)
424                                                 xmlSpace = (string) xmlSpaces.Peek ();
425                                         else
426                                                 xmlSpace = null;
427                                         if (name != expected)
428                                                 throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
429                                         handler.OnEndElement (name);
430                                         Expect ('>');
431                                         return;
432                                 default: // start tags (including empty tags)
433                                         HandleBufferedContent ();
434                                         name = ReadName ();
435                                         while (Peek () != '>' && Peek () != '/')
436                                                 ReadAttribute (attributes);
437                                         handler.OnStartElement (name, attributes);
438                                         attributes.Clear ();
439                                         SkipWhitespaces ();
440                                         if (Peek () == '/') {
441                                                 Read ();
442                                                 handler.OnEndElement (name);
443                                         }
444                                         else {
445                                                 elementNames.Push (name);
446                                                 xmlSpaces.Push (xmlSpace);
447                                         }
448                                         Expect ('>');
449                                         return;
450                                 }
451                         }
452                         else
453                                 ReadCharacters ();
454                 }
455
456                 private void HandleBufferedContent ()
457                 {
458                         if (buffer.Length == 0)
459                                 return;
460                         if (isWhitespace)
461                                 handler.OnIgnorableWhitespace (buffer.ToString ());
462                         else
463                                 handler.OnChars (buffer.ToString ());
464                         buffer.Length = 0;
465                         isWhitespace = false;
466                 }
467
468                 private void ReadCharacters ()
469                 {
470                         isWhitespace = false;
471                         while (true) {
472                                 int i = Peek ();
473                                 switch (i) {
474                                 case -1:
475                                         return;
476                                 case '<':
477                                         return;
478                                 case '&':
479                                         Read ();
480                                         ReadReference ();
481                                         continue;
482                                 default:
483                                         buffer.Append ((char) Read ());
484                                         continue;
485                                 }
486                         }
487                 }
488
489                 private void ReadReference ()
490                 {
491                         if (Peek () == '#') {
492                                 // character reference
493                                 Read ();
494                                 ReadCharacterReference ();
495                         } else {
496                                 string name = ReadName ();
497                                 Expect (';');
498                                 switch (name) {
499                                 case "amp":
500                                         buffer.Append ('&');
501                                         break;
502                                 case "quot":
503                                         buffer.Append ('"');
504                                         break;
505                                 case "apos":
506                                         buffer.Append ('\'');
507                                         break;
508                                 case "lt":
509                                         buffer.Append ('<');
510                                         break;
511                                 case "gt":
512                                         buffer.Append ('>');
513                                         break;
514                                 default:
515                                         throw Error ("General non-predefined entity reference is not supported in this parser.");
516                                 }
517                         }
518                 }
519
520                 private int ReadCharacterReference ()
521                 {
522                         int n = 0;
523                         if (Peek () == 'x') { // hex
524                                 Read ();
525                                 for (int i = Peek (); i >= 0; i = Peek ()) {
526                                         if ('0' <= i && i <= '9')
527                                                 n = n << 4 + i - '0';
528                                         else if ('A' <= i && i <='F')
529                                                 n = n << 4 + i - 'A' + 10;
530                                         else if ('a' <= i && i <='f')
531                                                 n = n << 4 + i - 'a' + 10;
532                                         else
533                                                 break;
534                                         Read ();
535                                 }
536                         } else {
537                                 for (int i = Peek (); i >= 0; i = Peek ()) {
538                                         if ('0' <= i && i <= '9')
539                                                 n = n << 4 + i - '0';
540                                         else
541                                                 break;
542                                         Read ();
543                                 }
544                         }
545                         return n;
546                 }
547
548                 private void ReadAttribute (AttrListImpl a)
549                 {
550                         SkipWhitespaces (true);
551                         if (Peek () == '/' || Peek () == '>')
552                         // came here just to spend trailing whitespaces
553                                 return;
554
555                         string name = ReadName ();
556                         string value;
557                         SkipWhitespaces ();
558                         Expect ('=');
559                         SkipWhitespaces ();
560                         switch (Read ()) {
561                         case '\'':
562                                 value = ReadUntil ('\'', true);
563                                 break;
564                         case '"':
565                                 value = ReadUntil ('"', true);
566                                 break;
567                         default:
568                                 throw Error ("Invalid attribute value markup.");
569                         }
570                         if (name == "xml:space")
571                                 xmlSpace = value;
572                         a.Add (name, value);
573                 }
574
575                 private void ReadCDATASection ()
576                 {
577                         int nBracket = 0;
578                         while (true) {
579                                 if (Peek () < 0)
580                                         throw UnexpectedEndError ();
581                                 char c = (char) Read ();
582                                 if (c == ']')
583                                         nBracket++;
584                                 else if (c == '>' && nBracket > 1) {
585                                         for (int i = nBracket; i > 2; i--)
586                                                 buffer.Append (']');
587                                         break;
588                                 }
589                                 else {
590                                         for (int i = 0; i < nBracket; i++)
591                                                 buffer.Append (']');
592                                         nBracket = 0;
593                                         buffer.Append (c);
594                                 }
595                         }
596                 }
597
598                 private void ReadComment ()
599                 {
600                         Expect ('-');
601                         Expect ('-');
602                         while (true) {
603                                 if (Read () != '-')
604                                         continue;
605                                 if (Read () != '-')
606                                         continue;
607                                 if (Read () != '>')
608                                         throw Error ("'--' is not allowed inside comment markup.");
609                                 break;
610                         }
611                 }
612         }
613
614         internal sealed class SmallXmlParserException : SystemException
615         {
616                 int line;
617                 int column;
618
619                 public SmallXmlParserException (string msg, int line, int column)
620                 : base (String.Format ("{0}. At ({1},{2})", msg, line, column))
621                 {
622                         this.line = line;
623                         this.column = column;
624                 }
625
626                 public int Line {
627                         get { return line; }
628                 }
629
630                 public int Column {
631                         get { return column; }
632                 }
633         }
634 }
635
636
637