* roottypes.cs: Rename from tree.cs.
[mono.git] / mcs / class / Commons.Xml.Relaxng / Commons.Xml.Relaxng / RelaxngValidatingReader.cs
1 //
2 // Commons.Xml.Relaxng.RelaxngValidatingReader
3 //
4 // Author:
5 //      Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
6 //      Alexandre Alapetite <http://alexandre.alapetite.net/cv/>
7 //
8 // 2003 Atsushi Enomoto. "No rights reserved."
9 //
10 // Copyright (c) 2004 Novell Inc.
11 // All rights reserved
12 //
13
14 //
15 // Permission is hereby granted, free of charge, to any person obtaining
16 // a copy of this software and associated documentation files (the
17 // "Software"), to deal in the Software without restriction, including
18 // without limitation the rights to use, copy, modify, merge, publish,
19 // distribute, sublicense, and/or sell copies of the Software, and to
20 // permit persons to whom the Software is furnished to do so, subject to
21 // the following conditions:
22 // 
23 // The above copyright notice and this permission notice shall be
24 // included in all copies or substantial portions of the Software.
25 // 
26 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
30 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
31 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
32 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 //
34 using System;
35 using System.Collections;
36 using System.Text;
37 using System.Xml;
38 using Commons.Xml.Relaxng.Derivative;
39
40 namespace Commons.Xml.Relaxng
41 {
42         public class RelaxngValidatingReader : XmlDefaultReader
43         {
44                 public RelaxngValidatingReader (XmlReader reader)
45                         : this (reader, (RelaxngPattern) null)
46                 {
47                 }
48
49                 public RelaxngValidatingReader (XmlReader reader, XmlReader grammarXml)
50                         : this (reader, grammarXml, null)
51                 {
52                 }
53
54                 public RelaxngValidatingReader (XmlReader reader, XmlReader grammarXml, RelaxngDatatypeProvider provider)
55                         : this (reader, RelaxngGrammar.Read (grammarXml, provider))
56                 {
57                 }
58
59                 public RelaxngValidatingReader (XmlReader reader, RelaxngPattern pattern)
60                         : base (reader)
61                 {
62                         if (pattern == null)
63                                 throw new ArgumentNullException ("pattern");
64
65                         if (reader.NodeType == XmlNodeType.Attribute)
66                                 throw new RelaxngException ("RELAX NG does not support standalone attribute validation (it is prohibited due to the specification section 7.1.5");
67                         this.reader = reader;
68                         this.pattern = pattern;
69                 }
70
71                 XmlReader reader;
72                 RelaxngPattern pattern;
73                 RdpPattern vState;
74                 RdpPattern prevState;   // Mainly for debugging.
75                 bool roughLabelCheck;
76                 ArrayList strictCheckCache;
77                 bool reportDetails;
78                 string cachedValue;
79                 int startElementDepth = -1;
80                 bool inContent;
81                 bool firstRead = true;
82
83                 internal string CurrentStateXml {
84                         get { return RdpUtil.DebugRdpPattern (vState, new Hashtable ()); }
85                 }
86
87                 internal string PreviousStateXml {
88                         get { return RdpUtil.DebugRdpPattern (prevState, new Hashtable ()); }
89                 }
90
91                 #region Validation State support
92
93                 public bool ReportDetails {
94                         get { return reportDetails; }
95                         set { reportDetails = value; }
96                 }
97
98                 public bool RoughLabelCheck {
99                         get { return roughLabelCheck; }
100                         set { roughLabelCheck = value; }
101                 }
102
103                 // It is used to disclose its validation feature to public
104                 class ValidationState
105                 {
106                         RdpPattern state;
107
108                         internal ValidationState (RdpPattern startState)
109                         {
110                                 this.state = startState;
111                         }
112
113                         public RdpPattern Pattern {
114                                 get { return state; }
115                         }
116
117                         public ValidationState AfterOpenStartTag (
118                                 string localName, string ns)
119                         {
120                                 RdpPattern p = state.StartTagOpenDeriv (
121                                         localName, ns);
122                                 return p is RdpNotAllowed ?
123                                         null : new ValidationState (p);
124                         }
125
126                         public bool OpenStartTag (string localName, string ns)
127                         {
128                                 RdpPattern p = state.StartTagOpenDeriv (
129                                         localName, ns);
130                                 if (p is RdpNotAllowed)
131                                         return false;
132                                 state = p;
133                                 return true;
134                         }
135
136                         public ValidationState AfterCloseStartTag ()
137                         {
138                                 RdpPattern p = state.StartTagCloseDeriv ();
139                                 return p is RdpNotAllowed ?
140                                         null : new ValidationState (p);
141                         }
142
143                         public bool CloseStartTag ()
144                         {
145                                 RdpPattern p = state.StartTagCloseDeriv ();
146                                 if (p is RdpNotAllowed)
147                                         return false;
148                                 state = p;
149                                 return true;
150                         }
151
152                         public ValidationState AfterEndTag ()
153                         {
154                                 RdpPattern p = state.EndTagDeriv ();
155                                 if (p is RdpNotAllowed)
156                                         return null;
157                                 return new ValidationState (p);
158                         }
159
160                         public bool EndTag ()
161                         {
162                                 RdpPattern p = state.EndTagDeriv ();
163                                 if (p is RdpNotAllowed)
164                                         return false;
165                                 state = p;
166                                 return true;
167                         }
168
169                         public ValidationState AfterAttribute (
170                                 string localName, string ns, XmlReader reader)
171                         {
172                                 RdpPattern p = state.AttDeriv (
173                                         localName, ns, null, reader);
174                                 if (p is RdpNotAllowed)
175                                         return null;
176                                 return new ValidationState (p);
177                         }
178
179                         public bool Attribute (
180                                 string localName, string ns, XmlReader reader)
181                         {
182                                 RdpPattern p = state.AttDeriv (
183                                         localName, ns, null, reader);
184                                 if (p is RdpNotAllowed)
185                                         return false;
186                                 state = p;
187                                 return true;
188                         }
189                 }
190
191                 public object GetCurrentState ()
192                 {
193                         PrepareState ();
194                         return new ValidationState (vState);
195                 }
196
197                 private ValidationState ToState (object stateObject)
198                 {
199                         if (stateObject == null)
200                                 throw new ArgumentNullException ("stateObject");
201                         ValidationState state = stateObject as ValidationState;
202                         if (state == null)
203                                 throw new ArgumentException ("Argument stateObject is not of expected type.");
204                         return state;
205                 }
206
207                 public object AfterOpenStartTag (object stateObject,
208                         string localName, string ns)
209                 {
210                         ValidationState state = ToState (stateObject);
211                         return state.AfterOpenStartTag (localName, ns);
212                 }
213
214                 public bool OpenStartTag (object stateObject,
215                         string localName, string ns)
216                 {
217                         ValidationState state = ToState (stateObject);
218                         return state.OpenStartTag (localName, ns);
219                 }
220
221                 public object AfterAttribute (object stateObject,
222                         string localName, string ns)
223                 {
224                         ValidationState state = ToState (stateObject);
225                         return state.AfterAttribute (localName, ns, this);
226                 }
227
228                 public bool Attribute (object stateObject,
229                         string localName, string ns)
230                 {
231                         ValidationState state = ToState (stateObject);
232                         return state.Attribute (localName, ns, this);
233                 }
234
235                 public object AfterCloseStartTag (object stateObject)
236                 {
237                         ValidationState state = ToState (stateObject);
238                         return state.AfterCloseStartTag ();
239                 }
240
241                 public bool CloseStartTag (object stateObject)
242                 {
243                         ValidationState state = ToState (stateObject);
244                         return state.CloseStartTag ();
245                 }
246
247                 public object AfterEndTag (object stateObject)
248                 {
249                         ValidationState state = ToState (stateObject);
250                         return state.AfterEndTag ();
251                 }
252
253                 public bool EndTag (object stateObject)
254                 {
255                         ValidationState state = ToState (stateObject);
256                         return state.EndTag ();
257                 }
258
259                 public ICollection GetElementLabels (object stateObject)
260                 {
261                         ValidationState state = ToState (stateObject);
262                         RdpPattern p = state.Pattern;
263                         Hashtable elements = new Hashtable ();
264                         Hashtable attributes = new Hashtable ();
265                         p.GetLabels (elements, attributes);
266
267                         if (roughLabelCheck)
268                                 return elements.Values;
269
270                         // Strict check that tries actual validation that will
271                         // cover rejection by notAllowed.
272                         if (strictCheckCache == null)
273                                 strictCheckCache = new ArrayList ();
274                         else
275                                 strictCheckCache.Clear ();
276                         foreach (XmlQualifiedName qname in elements.Values)
277                                 if (p.StartTagOpenDeriv (qname.Name, qname.Namespace) is RdpNotAllowed)
278                                         strictCheckCache.Add (qname);
279                         foreach (XmlQualifiedName qname in strictCheckCache)
280                                 elements.Remove (qname);
281                         strictCheckCache.Clear ();
282
283                         return elements.Values;
284                 }
285
286                 public ICollection GetAttributeLabels (object stateObject)
287                 {
288                         ValidationState state = ToState (stateObject);
289                         RdpPattern p = state.Pattern;
290                         Hashtable elements = new Hashtable ();
291                         Hashtable attributes = new Hashtable ();
292                         p.GetLabels (elements, attributes);
293
294                         if (roughLabelCheck)
295                                 return attributes.Values;
296
297                         // Strict check that tries actual validation that will
298                         // cover rejection by notAllowed.
299                         if (strictCheckCache == null)
300                                 strictCheckCache = new ArrayList ();
301                         else
302                                 strictCheckCache.Clear ();
303                         foreach (XmlQualifiedName qname in attributes.Values)
304                                 if (p.AttDeriv (qname.Name, qname.Namespace,null, this) is RdpNotAllowed)
305                                         strictCheckCache.Add (qname);
306                         foreach (XmlQualifiedName qname in strictCheckCache)
307                                 attributes.Remove (qname);
308                         strictCheckCache.Clear ();
309
310                         return attributes.Values;
311                 }
312
313                 public bool Emptiable (object stateObject)
314                 {
315                         ValidationState state = ToState (stateObject);
316                         RdpPattern p = state.Pattern;
317                         return !(p.EndTagDeriv () is RdpNotAllowed);
318                 }
319                 #endregion
320
321                 private RelaxngException CreateValidationError (string message,
322                         bool elements)
323                 {
324                         if (ReportDetails)
325                                 return CreateValidationError (String.Concat (message,
326                                         " Expected ",
327                                         elements ? "elements are: " : "attributes are: ",
328                                         BuildLabels (elements),
329                                         "."));
330                         return CreateValidationError (message);
331                 }
332
333                 private RelaxngException CreateValidationError (string message)
334                 {
335                         IXmlLineInfo li = reader as IXmlLineInfo;
336                         string lineInfo = reader.BaseURI;
337                         if (li != null)
338                                 lineInfo += String.Format (" line {0}, column {1}",
339                                         li.LineNumber, li.LinePosition);
340                         return new RelaxngException (message + lineInfo, prevState);
341                 }
342
343                 private void PrepareState ()
344                 {
345                         if (vState != null)
346                                 return;
347                         if (!pattern.IsCompiled) {
348                                 pattern.Compile ();
349                         }
350                         if (vState == null)
351                                 vState = pattern.StartPattern;
352                 }
353
354                 private string BuildLabels (bool elements)
355                 {
356                         StringBuilder sb = new StringBuilder ();
357                         ValidationState s = new ValidationState (prevState);
358                         ICollection col = elements ?
359                                 GetElementLabels (s) : GetAttributeLabels (s);
360                         foreach (XmlQualifiedName qname in col) {
361                                 sb.Append (qname.ToString ());
362                                 sb.Append (' ');
363                         }
364                         return sb.ToString ();
365                 }
366
367                 public override bool Read ()
368                 {
369                         PrepareState ();
370
371                         // If the input XmlReader is already positioned on
372                         // the first node to validate, skip Read() here
373                         // (idea by Alex).
374                         bool ret;
375                         if (firstRead) {
376                                 firstRead = false;
377                                 if (reader.ReadState == ReadState.Initial)
378                                         ret = reader.Read ();
379                                 else
380                                         ret = !((reader.ReadState == ReadState.Closed) || (reader.ReadState == ReadState.EndOfFile));
381                         }
382                         else
383                                 ret = reader.Read ();
384
385                         // Process pending text node validation if required.
386                         if (cachedValue != null)
387                                 ValidateText (ret);
388                         else if (cachedValue == null &&
389                                 reader.NodeType == XmlNodeType.EndElement && 
390                                 startElementDepth == reader.Depth)
391                                 ValidateWeakMatch3 ();
392
393                         switch (reader.NodeType) {
394                         case XmlNodeType.Element:
395                                 inContent = true;
396                                 // StartTagOpenDeriv
397                                 prevState = vState;
398                                 vState = memo.StartTagOpenDeriv (vState,
399                                         reader.LocalName, reader.NamespaceURI);
400                                 if (vState.PatternType == RelaxngPatternType.NotAllowed)
401                                         throw CreateValidationError (String.Format ("Invalid start tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true);
402
403                                 // AttsDeriv equals to for each AttDeriv
404                                 string elementNS = reader.NamespaceURI;
405                                 if (reader.MoveToFirstAttribute ()) {
406                                         do {
407                                                 if (reader.NamespaceURI == "http://www.w3.org/2000/xmlns/")
408                                                         continue;
409
410                                                 prevState = vState;
411                                                 string attrNS = reader.NamespaceURI;
412
413 #if false // old code
414
415                                                 vState = vState.AttDeriv (reader.LocalName, attrNS, reader.GetAttribute (reader.LocalName, attrNS), this);
416                                                 if (vState == RdpNotAllowed.Instance)
417                                                         throw CreateValidationError (String.Format ("Invalid attribute found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false);
418
419 #else
420
421                                                 prevState = vState;
422                                                 vState = memo.StartAttDeriv (vState, reader.LocalName, attrNS);
423                                                 if (vState == RdpNotAllowed.Instance)
424                                                         throw CreateValidationError (String.Format ("Invalid attribute found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false);
425                                                 prevState = vState;
426                                                 vState = memo.TextOnlyDeriv (vState);
427                                                 vState = TextDeriv (vState, reader.Value, reader);
428                                                 if (Util.IsWhitespace (reader.Value))
429                                                         vState = vState.Choice (prevState);
430                                                 vState = memo.EndAttDeriv (vState);
431                                                 if (vState == RdpNotAllowed.Instance)
432                                                         throw CreateValidationError (String.Format ("Invalid attribute value is found. Value = '{0}'", reader.Value), false);
433
434 #endif
435                                         } while (reader.MoveToNextAttribute ());
436                                         MoveToElement ();
437                                 }
438
439                                 // StarTagCloseDeriv
440                                 prevState = vState;
441                                 vState = memo.StartTagCloseDeriv (vState);
442                                 if (vState.PatternType == RelaxngPatternType.NotAllowed)
443                                         throw CreateValidationError (String.Format ("Invalid start tag closing found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false);
444
445                                 // if it is empty, then redirect to EndElement
446                                 if (reader.IsEmptyElement) {
447                                         ValidateWeakMatch3 ();
448                                         goto case XmlNodeType.EndElement;
449                                 }
450                                 break;
451                         case XmlNodeType.EndElement:
452                                 if (reader.Depth == 0)
453                                         inContent = false;
454                                 // EndTagDeriv
455                                 prevState = vState;
456                                 vState = memo.EndTagDeriv (vState);
457                                 if (vState.PatternType == RelaxngPatternType.NotAllowed)
458                                         throw CreateValidationError (String.Format ("Invalid end tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true);
459                                 break;
460                         case XmlNodeType.Whitespace:
461                                 if (inContent)
462                                         goto case XmlNodeType.Text;
463                                 break;
464                         case XmlNodeType.CDATA:
465                         case XmlNodeType.Text:
466                         case XmlNodeType.SignificantWhitespace:
467                                 // Whitespace cannot be skipped because data and
468                                 // value types are required to validate whitespaces.
469                                 cachedValue += Value;
470                                 break;
471                         }
472
473                         if (reader.NodeType == XmlNodeType.Element && !reader.IsEmptyElement)
474                                 startElementDepth = reader.Depth;
475                         else if (reader.NodeType == XmlNodeType.EndElement)
476                                 startElementDepth = -1;
477
478                         return ret;
479                 }
480
481                 RdpPattern TextDeriv (RdpPattern p, string value, XmlReader context)
482                 {
483                         if (value.Length > 0 && p.IsTextValueDependent)
484                                 return memo.TextDeriv (p, value, context);
485                         else
486                                 return memo.EmptyTextDeriv (p);
487                 }
488
489                 void ValidateText (bool remain)
490                 {
491                         RdpPattern ts = vState;
492                         switch (reader.NodeType) {
493                         case XmlNodeType.EndElement:
494                                 if (startElementDepth != reader.Depth)
495                                         goto case XmlNodeType.Element;
496                                 ts = ValidateTextOnlyCore ();
497                                 break;
498                         case XmlNodeType.Element:
499                                 startElementDepth = -1;
500                                 if (!Util.IsWhitespace (cachedValue)) {
501                                         ts = memo.MixedTextDeriv (ts);
502                                         ts = TextDeriv (ts, cachedValue, reader);
503                                 }
504                                 break;
505                         default:
506                                 if (!remain)
507                                         goto case XmlNodeType.Element;
508                                 return;
509                         }
510
511                         prevState = vState;
512                         vState = ts;
513
514                         if (vState.PatternType == RelaxngPatternType.NotAllowed)
515                                 throw CreateValidationError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true);
516                         cachedValue = null;
517                         return;
518                 }
519
520                 // section 6.2.7 weak match 3
521                 // childrenDeriv cx p [] = childrenDeriv cx p [(TextNode "")]
522                 void ValidateWeakMatch3 ()
523                 {
524                         cachedValue = String.Empty;
525                         RdpPattern ts = ValidateTextOnlyCore ();
526
527                         prevState = vState;
528                         vState = ts;
529
530                         if (vState.PatternType == RelaxngPatternType.NotAllowed)
531                                 throw CreateValidationError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true);
532                         cachedValue = null;
533                         startElementDepth = -1;
534                 }
535
536                 RdpPattern ValidateTextOnlyCore ()
537                 {
538                         RdpPattern ts = memo.TextOnlyDeriv (vState);
539                         ts = TextDeriv (ts, cachedValue, reader);
540                         if (Util.IsWhitespace (cachedValue))
541                                 ts = vState.Choice (ts);
542                         return ts;
543                 }
544
545                 MemoizationStore memo = new MemoizationStore ();
546         }
547
548         #region Memoization support
549         internal class MemoizationStore
550         {
551                 Hashtable startOpen = new Hashtable ();
552                 Hashtable startClose = new Hashtable ();
553                 Hashtable startAtt = new Hashtable ();
554                 Hashtable endTag = new Hashtable ();
555                 Hashtable endAtt = new Hashtable ();
556                 Hashtable textOnly = new Hashtable ();
557                 Hashtable mixedText = new Hashtable ();
558                 Hashtable emptyText = new Hashtable ();
559                 Hashtable text = new Hashtable ();
560                 Hashtable text_value = new Hashtable ();
561                 Hashtable qnames = new Hashtable ();
562
563                 enum DerivativeType {
564                         StartTagOpen,
565                         StartAtt,
566                         StartTagClose,
567                         EndTag,
568                         EndAtt,
569                         Mixed,
570                         TextOnly
571                 }
572
573                 XmlQualifiedName GetQName (string local, string ns)
574                 {
575                         Hashtable nst = qnames [ns] as Hashtable;
576                         if (nst == null) {
577                                 nst = new Hashtable ();
578                                 qnames [ns] = nst;
579                         }
580                         XmlQualifiedName qn = nst [local] as XmlQualifiedName;
581                         if (qn == null) {
582                                 qn = new XmlQualifiedName (local, ns);
583                                 nst [local] = qn;
584                         }
585                         return qn;
586                 }
587
588                 public RdpPattern StartTagOpenDeriv (RdpPattern p, string local, string ns)
589                 {
590                         Hashtable h = startOpen [p] as Hashtable;
591                         if (h == null) {
592                                 h = new Hashtable ();
593                                 startOpen [p] = h;
594                         }
595                         XmlQualifiedName qn = GetQName (local, ns);
596                         RdpPattern m = h [qn] as RdpPattern;
597                         if (m == null) {
598                                 m = p.StartTagOpenDeriv (local, ns, this);
599                                 h [qn] = m;
600                         }
601                         return m;
602                 }
603
604                 public RdpPattern StartAttDeriv (RdpPattern p, string local, string ns)
605                 {
606                         Hashtable h = startAtt [p] as Hashtable;
607                         if (h == null) {
608                                 h = new Hashtable ();
609                                 startAtt [p] = h;
610                         }
611                         XmlQualifiedName qn = GetQName (local, ns);
612                         RdpPattern m = h [qn] as RdpPattern;
613                         if (m == null) {
614                                 m = p.StartAttDeriv (local, ns, this);
615                                 h [qn] = m;
616                         }
617                         return m;
618                 }
619
620                 public RdpPattern StartTagCloseDeriv (RdpPattern p)
621                 {
622                         RdpPattern m = startClose [p] as RdpPattern;
623                         if (m != null)
624                                 return m;
625
626                         m = p.StartTagCloseDeriv (this);
627                         startClose [p] = m;
628                         return m;
629                 }
630
631                 public RdpPattern EndTagDeriv (RdpPattern p)
632                 {
633                         RdpPattern m = endTag [p] as RdpPattern;
634                         if (m != null)
635                                 return m;
636
637                         m = p.EndTagDeriv (this);
638                         endTag [p] = m;
639                         return m;
640                 }
641
642                 public RdpPattern EndAttDeriv (RdpPattern p)
643                 {
644                         RdpPattern m = endAtt [p] as RdpPattern;
645                         if (m != null)
646                                 return m;
647
648                         m = p.EndAttDeriv (this);
649                         endAtt [p] = m;
650                         return m;
651                 }
652
653                 public RdpPattern MixedTextDeriv (RdpPattern p)
654                 {
655                         RdpPattern m = mixedText [p] as RdpPattern;
656                         if (m != null)
657                                 return m;
658
659                         m = p.MixedTextDeriv (this);
660                         mixedText [p] = m;
661                         return m;
662                 }
663
664                 public RdpPattern TextOnlyDeriv (RdpPattern p)
665                 {
666                         RdpPattern m = textOnly [p] as RdpPattern;
667                         if (m != null)
668                                 return m;
669
670                         m = p.TextOnlyDeriv (this);
671                         textOnly [p] = m;
672                         return m;
673                 }
674
675                 public RdpPattern TextDeriv (RdpPattern p, string value, XmlReader context)
676                 {
677                         if (p.IsContextDependent)
678                                 return p.TextDeriv (value, context);
679
680                         if (Object.ReferenceEquals (text_value [p], value))
681                                 return text [p] as RdpPattern;
682                         RdpPattern m = p.TextDeriv (value, context, this);
683                         text_value [p] = value;
684                         text [p] = m;
685                         return m;
686                 }
687
688                 public RdpPattern EmptyTextDeriv (RdpPattern p)
689                 {
690                         RdpPattern m = emptyText [p] as RdpPattern;
691                         if (m != null)
692                                 return m;
693
694                         m = p.EmptyTextDeriv (this);
695                         emptyText [p] = m;
696                         return m;
697                 }
698         }
699         #endregion
700 }
701