2 // Commons.Xml.Relaxng.RelaxngValidatingReader
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
6 // Alexandre Alapetite <http://alexandre.alapetite.fr/cv/>
8 // 2003 Atsushi Enomoto. "No rights reserved."
10 // Copyright (c) 2004 Novell Inc.
11 // All rights reserved
15 // Permission is hereby granted, free of charge, to any person obtaining
16 // a copy of this software and associated documentation files (the
17 // "Software"), to deal in the Software without restriction, including
18 // without limitation the rights to use, copy, modify, merge, publish,
19 // distribute, sublicense, and/or sell copies of the Software, and to
20 // permit persons to whom the Software is furnished to do so, subject to
21 // the following conditions:
23 // The above copyright notice and this permission notice shall be
24 // included in all copies or substantial portions of the Software.
26 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
30 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
31 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
32 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35 using System.Collections;
38 using Commons.Xml.Relaxng.Derivative;
40 namespace Commons.Xml.Relaxng
42 public class RelaxngValidatingReader : XmlDefaultReader
44 public RelaxngValidatingReader (XmlReader reader)
45 : this (reader, (RelaxngPattern) null)
49 public RelaxngValidatingReader (XmlReader reader, XmlReader grammarXml)
50 : this (reader, grammarXml, null)
54 public RelaxngValidatingReader (XmlReader reader, XmlReader grammarXml, RelaxngDatatypeProvider provider)
55 : this (reader, RelaxngGrammar.Read (grammarXml, provider))
59 public RelaxngValidatingReader (XmlReader reader, RelaxngPattern pattern)
63 throw new ArgumentNullException ("pattern");
65 if (reader.NodeType == XmlNodeType.Attribute)
66 throw new RelaxngException ("RELAX NG does not support standalone attribute validation (it is prohibited due to the specification section 7.1.5");
68 this.pattern = pattern;
72 RelaxngPattern pattern;
74 RdpPattern prevState; // Mainly for debugging.
76 ArrayList strictCheckCache;
79 int startElementDepth = -1;
81 bool firstRead = true;
83 public delegate bool RelaxngValidationEventHandler (XmlReader source, string message);
85 public static readonly RelaxngValidationEventHandler IgnoreError = delegate { return true; };
87 public event RelaxngValidationEventHandler InvalidNodeFound;
89 delegate RdpPattern RecoveryHandler (RdpPattern source);
91 RdpPattern HandleError (string error, bool elements, RdpPattern source, RecoveryHandler recover)
93 if (InvalidNodeFound != null && InvalidNodeFound (this, error))
94 return recover (source);
96 throw CreateValidationError (error, true);
99 internal string CurrentStateXml {
100 get { return RdpUtil.DebugRdpPattern (vState, new Hashtable ()); }
103 internal string PreviousStateXml {
104 get { return RdpUtil.DebugRdpPattern (prevState, new Hashtable ()); }
107 #region Validation State support
109 public bool ReportDetails {
110 get { return reportDetails; }
111 set { reportDetails = value; }
114 public bool RoughLabelCheck {
115 get { return roughLabelCheck; }
116 set { roughLabelCheck = value; }
119 // It is used to disclose its validation feature to public
120 class ValidationState
124 internal ValidationState (RdpPattern startState)
126 this.state = startState;
129 public RdpPattern Pattern {
130 get { return state; }
133 public ValidationState AfterOpenStartTag (
134 string localName, string ns)
136 RdpPattern p = state.StartTagOpenDeriv (
138 return p is RdpNotAllowed ?
139 null : new ValidationState (p);
142 public bool OpenStartTag (string localName, string ns)
144 RdpPattern p = state.StartTagOpenDeriv (
146 if (p is RdpNotAllowed)
152 public ValidationState AfterCloseStartTag ()
154 RdpPattern p = state.StartTagCloseDeriv ();
155 return p is RdpNotAllowed ?
156 null : new ValidationState (p);
159 public bool CloseStartTag ()
161 RdpPattern p = state.StartTagCloseDeriv ();
162 if (p is RdpNotAllowed)
168 public ValidationState AfterEndTag ()
170 RdpPattern p = state.EndTagDeriv ();
171 if (p is RdpNotAllowed)
173 return new ValidationState (p);
176 public bool EndTag ()
178 RdpPattern p = state.EndTagDeriv ();
179 if (p is RdpNotAllowed)
185 public ValidationState AfterAttribute (
186 string localName, string ns, XmlReader reader)
188 RdpPattern p = state.AttDeriv (
189 localName, ns, null, reader);
190 if (p is RdpNotAllowed)
192 return new ValidationState (p);
195 public bool Attribute (
196 string localName, string ns, XmlReader reader)
198 RdpPattern p = state.AttDeriv (
199 localName, ns, null, reader);
200 if (p is RdpNotAllowed)
207 public object GetCurrentState ()
210 return new ValidationState (vState);
213 private ValidationState ToState (object stateObject)
215 if (stateObject == null)
216 throw new ArgumentNullException ("stateObject");
217 ValidationState state = stateObject as ValidationState;
219 throw new ArgumentException ("Argument stateObject is not of expected type.");
223 public object AfterOpenStartTag (object stateObject,
224 string localName, string ns)
226 ValidationState state = ToState (stateObject);
227 return state.AfterOpenStartTag (localName, ns);
230 public bool OpenStartTag (object stateObject,
231 string localName, string ns)
233 ValidationState state = ToState (stateObject);
234 return state.OpenStartTag (localName, ns);
237 public object AfterAttribute (object stateObject,
238 string localName, string ns)
240 ValidationState state = ToState (stateObject);
241 return state.AfterAttribute (localName, ns, this);
244 public bool Attribute (object stateObject,
245 string localName, string ns)
247 ValidationState state = ToState (stateObject);
248 return state.Attribute (localName, ns, this);
251 public object AfterCloseStartTag (object stateObject)
253 ValidationState state = ToState (stateObject);
254 return state.AfterCloseStartTag ();
257 public bool CloseStartTag (object stateObject)
259 ValidationState state = ToState (stateObject);
260 return state.CloseStartTag ();
263 public object AfterEndTag (object stateObject)
265 ValidationState state = ToState (stateObject);
266 return state.AfterEndTag ();
269 public bool EndTag (object stateObject)
271 ValidationState state = ToState (stateObject);
272 return state.EndTag ();
275 public ICollection GetElementLabels (object stateObject)
277 ValidationState state = ToState (stateObject);
278 RdpPattern p = state.Pattern;
279 Hashtable elements = new Hashtable ();
280 Hashtable attributes = new Hashtable ();
281 p.GetLabels (elements, attributes);
284 return elements.Values;
286 // Strict check that tries actual validation that will
287 // cover rejection by notAllowed.
288 if (strictCheckCache == null)
289 strictCheckCache = new ArrayList ();
291 strictCheckCache.Clear ();
292 foreach (XmlQualifiedName qname in elements.Values)
293 if (p.StartTagOpenDeriv (qname.Name, qname.Namespace) is RdpNotAllowed)
294 strictCheckCache.Add (qname);
295 foreach (XmlQualifiedName qname in strictCheckCache)
296 elements.Remove (qname);
297 strictCheckCache.Clear ();
299 return elements.Values;
302 public ICollection GetAttributeLabels (object stateObject)
304 ValidationState state = ToState (stateObject);
305 RdpPattern p = state.Pattern;
306 Hashtable elements = new Hashtable ();
307 Hashtable attributes = new Hashtable ();
308 p.GetLabels (elements, attributes);
311 return attributes.Values;
313 // Strict check that tries actual validation that will
314 // cover rejection by notAllowed.
315 if (strictCheckCache == null)
316 strictCheckCache = new ArrayList ();
318 strictCheckCache.Clear ();
319 foreach (XmlQualifiedName qname in attributes.Values)
320 if (p.AttDeriv (qname.Name, qname.Namespace,null, this) is RdpNotAllowed)
321 strictCheckCache.Add (qname);
322 foreach (XmlQualifiedName qname in strictCheckCache)
323 attributes.Remove (qname);
324 strictCheckCache.Clear ();
326 return attributes.Values;
329 public bool Emptiable (object stateObject)
331 ValidationState state = ToState (stateObject);
332 RdpPattern p = state.Pattern;
333 return !(p.EndTagDeriv () is RdpNotAllowed);
337 private RelaxngException CreateValidationError (string message,
341 return CreateValidationError (String.Concat (message,
343 elements ? "elements are: " : "attributes are: ",
344 BuildLabels (elements),
346 return CreateValidationError (message);
349 private RelaxngException CreateValidationError (string message)
351 IXmlLineInfo li = reader as IXmlLineInfo;
352 string lineInfo = reader.BaseURI;
354 lineInfo += String.Format (" line {0}, column {1}",
355 li.LineNumber, li.LinePosition);
356 return new RelaxngException (message + lineInfo, prevState);
359 private void PrepareState ()
363 if (!pattern.IsCompiled) {
367 vState = pattern.StartPattern;
370 private string BuildLabels (bool elements)
372 StringBuilder sb = new StringBuilder ();
373 ValidationState s = new ValidationState (prevState);
374 ICollection col = elements ?
375 GetElementLabels (s) : GetAttributeLabels (s);
376 foreach (XmlQualifiedName qname in col) {
377 sb.Append (qname.ToString ());
380 return sb.ToString ();
383 public override bool Read ()
387 // If the input XmlReader is already positioned on
388 // the first node to validate, skip Read() here
393 if (reader.ReadState == ReadState.Initial)
394 ret = reader.Read ();
396 ret = !((reader.ReadState == ReadState.Closed) || (reader.ReadState == ReadState.EndOfFile));
399 ret = reader.Read ();
401 // Process pending text node validation if required.
402 if (cachedValue != null)
404 else if (cachedValue == null &&
405 reader.NodeType == XmlNodeType.EndElement &&
406 startElementDepth == reader.Depth)
407 ValidateWeakMatch3 ();
409 switch (reader.NodeType) {
410 case XmlNodeType.Element:
414 vState = memo.StartTagOpenDeriv (vState,
415 reader.LocalName, reader.NamespaceURI);
416 if (vState.PatternType == RelaxngPatternType.NotAllowed) {
417 if (InvalidNodeFound != null)
418 vState = HandleError (String.Format ("Invalid start tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true, prevState, RecoverFromInvalidStartTag);
421 // AttsDeriv equals to for each AttDeriv
422 string elementNS = reader.NamespaceURI;
423 if (reader.MoveToFirstAttribute ()) {
425 if (reader.NamespaceURI == "http://www.w3.org/2000/xmlns/")
428 RdpPattern savedState = vState;
430 string attrNS = reader.NamespaceURI;
432 vState = memo.StartAttDeriv (vState, reader.LocalName, attrNS);
433 if (vState == RdpNotAllowed.Instance) {
434 vState = HandleError (String.Format ("Invalid attribute occurence found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false, savedState, p => p);
435 continue; // the following steps are ignored.
438 vState = memo.TextOnlyDeriv (vState);
439 vState = TextDeriv (vState, reader.Value, reader);
440 if (Util.IsWhitespace (reader.Value))
441 vState = vState.Choice (prevState);
442 if (vState == RdpNotAllowed.Instance)
443 vState = HandleError (String.Format ("Invalid attribute value is found. Value = '{0}'", reader.Value), false, prevState, RecoverFromInvalidText);
445 vState = memo.EndAttDeriv (vState);
446 if (vState == RdpNotAllowed.Instance)
447 vState = HandleError (String.Format ("Invalid attribute value is found. Value = '{0}'", reader.Value), false, prevState, RecoverFromInvalidEnd);
448 } while (reader.MoveToNextAttribute ());
454 vState = memo.StartTagCloseDeriv (vState);
455 if (vState.PatternType == RelaxngPatternType.NotAllowed)
456 vState = HandleError (String.Format ("Invalid start tag closing found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false, prevState, RecoverFromInvalidStartTagClose);
458 // if it is empty, then redirect to EndElement
459 if (reader.IsEmptyElement) {
460 ValidateWeakMatch3 ();
461 goto case XmlNodeType.EndElement;
464 case XmlNodeType.EndElement:
465 if (reader.Depth == 0)
469 vState = memo.EndTagDeriv (vState);
470 if (vState.PatternType == RelaxngPatternType.NotAllowed)
471 vState = HandleError (String.Format ("Invalid end tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true, prevState, RecoverFromInvalidEnd);
473 case XmlNodeType.Whitespace:
475 goto case XmlNodeType.Text;
477 case XmlNodeType.CDATA:
478 case XmlNodeType.Text:
479 case XmlNodeType.SignificantWhitespace:
480 // Whitespace cannot be skipped because data and
481 // value types are required to validate whitespaces.
482 cachedValue += Value;
486 if (reader.NodeType == XmlNodeType.Element && !reader.IsEmptyElement)
487 startElementDepth = reader.Depth;
488 else if (reader.NodeType == XmlNodeType.EndElement)
489 startElementDepth = -1;
494 #region error recovery
495 // Error recovery feature can be enabled by using
496 // InvalidNodeFound event of type RelaxngValidationEventHandler.
498 // Other than startTagOpenDeriv, it is (again) based on
499 // James Clark's derivative algorithm.
500 // http://www.thaiopensource.com/relaxng/derivative.html
501 // For invalid start tag, we just recover from it by using
502 // xs:any-like pattern for unexpected node occurence.
504 RdpPattern MakeGroupHeadOptional (RdpPattern p)
506 if (p is RdpAbstractSingleContent)
507 return new RdpChoice (RdpEmpty.Instance, p);
508 RdpAbstractBinary ab = p as RdpAbstractBinary;
512 return new RdpGroup (new RdpChoice (RdpEmpty.Instance, ab.LValue), ab.RValue);
513 else if (ab is RdpChoice)
514 return new RdpChoice (MakeGroupHeadOptional (ab.LValue), MakeGroupHeadOptional (ab.RValue));
515 else if (ab is RdpInterleave)
516 return new RdpInterleave (MakeGroupHeadOptional (ab.LValue), MakeGroupHeadOptional (ab.RValue));
517 else if (ab is RdpAfter) // FIXME: is it correct?
518 return new RdpAfter (MakeGroupHeadOptional (ab.LValue), MakeGroupHeadOptional (ab.RValue));
519 throw new SystemException ("INTERNAL ERROR: unexpected pattern: " + p.GetType ());
522 RdpPattern ReplaceAfterHeadWithEmpty (RdpPattern p)
524 if (p is RdpAbstractSingleContent)
525 return new RdpChoice (RdpEmpty.Instance, p);
526 RdpAbstractBinary ab = p as RdpAbstractBinary;
530 return new RdpGroup (ReplaceAfterHeadWithEmpty (ab.LValue), ReplaceAfterHeadWithEmpty (ab.RValue));
531 else if (ab is RdpChoice)
532 return new RdpChoice (ReplaceAfterHeadWithEmpty (ab.LValue), ReplaceAfterHeadWithEmpty (ab.RValue));
533 else if (ab is RdpInterleave)
534 return new RdpInterleave (ReplaceAfterHeadWithEmpty (ab.LValue), ReplaceAfterHeadWithEmpty (ab.RValue));
535 else if (ab is RdpAfter)
536 return new RdpAfter (RdpEmpty.Instance, ab.RValue);
537 throw new SystemException ("INTERNAL ERROR: unexpected pattern: " + p.GetType ());
540 RdpPattern CollectAfterTailAsChoice (RdpPattern p)
542 RdpAbstractBinary ab = p as RdpAbstractBinary;
544 return RdpEmpty.Instance;
547 RdpPattern l = CollectAfterTailAsChoice (ab.LValue);
548 if (l == RdpEmpty.Instance)
549 return CollectAfterTailAsChoice (ab.RValue);
550 RdpPattern r = CollectAfterTailAsChoice (ab.RValue);
551 if (r == RdpEmpty.Instance)
553 return new RdpChoice (l, r);
556 RdpPattern ReplaceAttributesWithEmpty (RdpPattern p)
558 if (p is RdpAttribute)
559 return RdpEmpty.Instance;
561 RdpAbstractSingleContent asc = p as RdpAbstractSingleContent;
563 return new RdpList (ReplaceAttributesWithEmpty (asc.Child));
564 if (asc is RdpOneOrMore)
565 return new RdpOneOrMore (ReplaceAttributesWithEmpty (asc.Child));
566 else if (asc is RdpElement)
567 return asc; // should not be expected to contain any attribute as RdpElement.
569 RdpAbstractBinary ab = p as RdpAbstractBinary;
573 return new RdpGroup (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
574 else if (ab is RdpChoice)
575 return new RdpChoice (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
576 else if (ab is RdpInterleave)
577 return new RdpInterleave (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
578 else if (ab is RdpAfter) // FIXME: is it correct?
579 return new RdpAfter (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
580 throw new SystemException ("INTERNAL ERROR: unexpected pattern: " + p.GetType ());
583 RdpPattern RecoverFromInvalidStartTag (RdpPattern p)
585 RdpPattern test1 = MakeGroupHeadOptional (p);
586 test1 = memo.StartTagOpenDeriv (test1, reader.LocalName, reader.NamespaceURI);
589 // FIXME: JJC derivative algorithm suggests more complicated recovery. We simply treats current "extra" node as "anything".
590 return new RdpChoice (RdpPattern.Anything, p);
593 RdpPattern RecoverFromInvalidText (RdpPattern p)
595 return ReplaceAfterHeadWithEmpty (p);
598 RdpPattern RecoverFromInvalidEnd (RdpPattern p)
600 return CollectAfterTailAsChoice (p);
603 RdpPattern RecoverFromInvalidStartTagClose (RdpPattern p)
605 return ReplaceAttributesWithEmpty (p);
610 RdpPattern TextDeriv (RdpPattern p, string value, XmlReader context)
612 if (value.Length > 0 && p.IsTextValueDependent)
613 return memo.TextDeriv (p, value, context);
615 return memo.EmptyTextDeriv (p);
618 void ValidateText (bool remain)
620 RdpPattern ts = vState;
621 switch (reader.NodeType) {
622 case XmlNodeType.EndElement:
623 if (startElementDepth != reader.Depth)
624 goto case XmlNodeType.Element;
625 ts = ValidateTextOnlyCore ();
627 case XmlNodeType.Element:
628 startElementDepth = -1;
629 if (!Util.IsWhitespace (cachedValue)) {
630 // HandleError() is not really useful here since it involves else condition...
631 ts = memo.MixedTextDeriv (ts);
632 /*if (InvalidNodeFound != null) {
633 InvalidNodeFound (reader, "Not allowed text node was found.");
638 ts = TextDeriv (ts, cachedValue, reader);
643 goto case XmlNodeType.Element;
650 if (vState.PatternType == RelaxngPatternType.NotAllowed)
651 vState = HandleError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true, prevState, RecoverFromInvalidText);
656 // section 6.2.7 weak match 3
657 // childrenDeriv cx p [] = childrenDeriv cx p [(TextNode "")]
658 void ValidateWeakMatch3 ()
660 cachedValue = String.Empty;
661 RdpPattern ts = ValidateTextOnlyCore ();
666 if (vState.PatternType == RelaxngPatternType.NotAllowed)
667 vState = HandleError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true, prevState, RecoverFromInvalidText);
669 startElementDepth = -1;
672 RdpPattern ValidateTextOnlyCore ()
674 RdpPattern ts = memo.TextOnlyDeriv (vState);
675 ts = TextDeriv (ts, cachedValue, reader);
676 if (Util.IsWhitespace (cachedValue))
677 ts = vState.Choice (ts);
681 MemoizationStore memo = new MemoizationStore ();
684 #region Memoization support
685 internal class MemoizationStore
687 Hashtable startOpen = new Hashtable ();
688 Hashtable startClose = new Hashtable ();
689 Hashtable startAtt = new Hashtable ();
690 Hashtable endTag = new Hashtable ();
691 Hashtable endAtt = new Hashtable ();
692 Hashtable textOnly = new Hashtable ();
693 Hashtable mixedText = new Hashtable ();
694 Hashtable emptyText = new Hashtable ();
695 Hashtable text = new Hashtable ();
696 Hashtable text_value = new Hashtable ();
697 Hashtable qnames = new Hashtable ();
699 enum DerivativeType {
709 XmlQualifiedName GetQName (string local, string ns)
711 Hashtable nst = qnames [ns] as Hashtable;
713 nst = new Hashtable ();
716 XmlQualifiedName qn = nst [local] as XmlQualifiedName;
718 qn = new XmlQualifiedName (local, ns);
724 public RdpPattern StartTagOpenDeriv (RdpPattern p, string local, string ns)
726 Hashtable h = startOpen [p] as Hashtable;
728 h = new Hashtable ();
731 XmlQualifiedName qn = GetQName (local, ns);
732 RdpPattern m = h [qn] as RdpPattern;
734 m = p.StartTagOpenDeriv (local, ns, this);
740 public RdpPattern StartAttDeriv (RdpPattern p, string local, string ns)
742 Hashtable h = startAtt [p] as Hashtable;
744 h = new Hashtable ();
747 XmlQualifiedName qn = GetQName (local, ns);
748 RdpPattern m = h [qn] as RdpPattern;
750 m = p.StartAttDeriv (local, ns, this);
756 public RdpPattern StartTagCloseDeriv (RdpPattern p)
758 RdpPattern m = startClose [p] as RdpPattern;
762 m = p.StartTagCloseDeriv (this);
767 public RdpPattern EndTagDeriv (RdpPattern p)
769 RdpPattern m = endTag [p] as RdpPattern;
773 m = p.EndTagDeriv (this);
778 public RdpPattern EndAttDeriv (RdpPattern p)
780 RdpPattern m = endAtt [p] as RdpPattern;
784 m = p.EndAttDeriv (this);
789 public RdpPattern MixedTextDeriv (RdpPattern p)
791 RdpPattern m = mixedText [p] as RdpPattern;
795 m = p.MixedTextDeriv (this);
800 public RdpPattern TextOnlyDeriv (RdpPattern p)
802 RdpPattern m = textOnly [p] as RdpPattern;
806 m = p.TextOnlyDeriv (this);
811 public RdpPattern TextDeriv (RdpPattern p, string value, XmlReader context)
813 if (p.IsContextDependent)
814 return p.TextDeriv (value, context);
816 if (Object.ReferenceEquals (text_value [p], value))
817 return text [p] as RdpPattern;
818 RdpPattern m = p.TextDeriv (value, context, this);
819 text_value [p] = value;
824 public RdpPattern EmptyTextDeriv (RdpPattern p)
826 RdpPattern m = emptyText [p] as RdpPattern;
830 m = p.EmptyTextDeriv (this);