2009-02-23 Atsushi Enomoto <atsushi@ximian.com>
authorAtsushi Eno <atsushieno@gmail.com>
Mon, 23 Feb 2009 07:01:05 +0000 (07:01 -0000)
committerAtsushi Eno <atsushieno@gmail.com>
Mon, 23 Feb 2009 07:01:05 +0000 (07:01 -0000)
* RelaxngValidatingReader.cs : implemented error recovery feature.
  Use InvalidNodeFound event (of type RelaxngValidationEventHandler)
  to handle validation error like ValidationEventHandler in
  System.Xml.XmlSchema.

* RdpPattern.cs : added "Anything" which is used in error recovery
  support.

* relaxngtest.cs : added --skip-error option to test error recovery.

svn path=/trunk/mcs/; revision=127703

mcs/class/Commons.Xml.Relaxng/Commons.Xml.Relaxng.Derivative/ChangeLog
mcs/class/Commons.Xml.Relaxng/Commons.Xml.Relaxng.Derivative/RdpPatterns.cs
mcs/class/Commons.Xml.Relaxng/Commons.Xml.Relaxng/ChangeLog
mcs/class/Commons.Xml.Relaxng/Commons.Xml.Relaxng/RelaxngValidatingReader.cs
mcs/class/Commons.Xml.Relaxng/Test/standalone_tests/ChangeLog
mcs/class/Commons.Xml.Relaxng/Test/standalone_tests/relaxngtest.cs

index 765517c372169c78fd5a6b4eb9573e9bdac59fad..795a3f32f84c0a68531f9248db45aa60548a1537 100644 (file)
@@ -1,3 +1,8 @@
+2009-02-23  Atsushi Enomoto <atsushi@ximian.com>
+
+       * RdpPattern.cs : added "Anything" which is used in error recovery
+         support.
+
 2007-12-14  Atsushi Enomoto <atsushi@ximian.com>
 
        * RdpPattern.cs : For ContainsText() (for interleave text/text 
index fb390d3b48339fd990aecd1104b158a94185f803..b65e59f33575c873b7d4274556f707350e622f19 100644 (file)
@@ -48,6 +48,16 @@ namespace Commons.Xml.Relaxng.Derivative
        // for now).
        public abstract class RdpPattern
        {
+               public static readonly RdpPattern Anything;
+
+               static RdpPattern ()
+               {
+                       RdpPattern anyAtts = new RdpList (new RdpAttribute (RdpAnyName.Instance, RdpText.Instance));
+                       RdpElement anyElement = new RdpElement (RdpAnyName.Instance, null);
+                       Anything = new RdpChoice (RdpEmpty.Instance, new RdpChoice (anyAtts, new RdpChoice (RdpText.Instance, new RdpList (anyElement))));
+                       anyElement.Children = Anything;
+               }
+
                internal bool nullableComputed;
                internal bool isNullable;
                Hashtable patternPool;
index 19642be10b61ad6d82a20234c769ed25ab29fdf9..5ca967410d4670c2ac0d758a20f3f82f1d5753e5 100644 (file)
@@ -1,3 +1,10 @@
+2009-02-23  Atsushi Enomoto <atsushi@ximian.com>
+
+       * RelaxngValidatingReader.cs : implemented error recovery feature.
+         Use InvalidNodeFound event (of type RelaxngValidationEventHandler)
+         to handle validation error like ValidationEventHandler in
+         System.Xml.XmlSchema.
+
 2009-02-19  Atsushi Enomoto <atsushi@ximian.com>
 
        * XsdDatatypeProvider.cs : detect grammar-level error in type usage
index 875d705c3b24edab6aa842fe7ab4d55db65e1e99..e645f4444df024cd706f353c4983519ae6547d53 100644 (file)
@@ -80,6 +80,22 @@ namespace Commons.Xml.Relaxng
                bool inContent;
                bool firstRead = true;
 
+               public delegate bool RelaxngValidationEventHandler (XmlReader source, string message);
+
+               public static readonly RelaxngValidationEventHandler IgnoreError = delegate { return true; };
+
+               public event RelaxngValidationEventHandler InvalidNodeFound;
+
+               delegate RdpPattern RecoveryHandler (RdpPattern source);
+
+               RdpPattern HandleError (string error, bool elements, RdpPattern source, RecoveryHandler recover)
+               {
+                       if (InvalidNodeFound != null && InvalidNodeFound (this, error))
+                               return recover (source);
+                       else
+                               throw CreateValidationError (error, true);
+               }
+
                internal string CurrentStateXml {
                        get { return RdpUtil.DebugRdpPattern (vState, new Hashtable ()); }
                }
@@ -397,8 +413,10 @@ namespace Commons.Xml.Relaxng
                                prevState = vState;
                                vState = memo.StartTagOpenDeriv (vState,
                                        reader.LocalName, reader.NamespaceURI);
-                               if (vState.PatternType == RelaxngPatternType.NotAllowed)
-                                       throw CreateValidationError (String.Format ("Invalid start tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true);
+                               if (vState.PatternType == RelaxngPatternType.NotAllowed) {
+                                       if (InvalidNodeFound != null)
+                                               vState = HandleError (String.Format ("Invalid start tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true, prevState, RecoverFromInvalidStartTag);
+                               }
 
                                // AttsDeriv equals to for each AttDeriv
                                string elementNS = reader.NamespaceURI;
@@ -407,31 +425,26 @@ namespace Commons.Xml.Relaxng
                                                if (reader.NamespaceURI == "http://www.w3.org/2000/xmlns/")
                                                        continue;
 
+                                               RdpPattern savedState = vState;
                                                prevState = vState;
                                                string attrNS = reader.NamespaceURI;
 
-#if false // old code
-
-                                               vState = vState.AttDeriv (reader.LocalName, attrNS, reader.GetAttribute (reader.LocalName, attrNS), this);
-                                               if (vState == RdpNotAllowed.Instance)
-                                                       throw CreateValidationError (String.Format ("Invalid attribute found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false);
-
-#else
-
-                                               prevState = vState;
                                                vState = memo.StartAttDeriv (vState, reader.LocalName, attrNS);
-                                               if (vState == RdpNotAllowed.Instance)
-                                                       throw CreateValidationError (String.Format ("Invalid attribute found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false);
+                                               if (vState == RdpNotAllowed.Instance) {
+                                                       vState = HandleError (String.Format ("Invalid attribute occurence found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false, savedState, p => p);
+                                                       continue; // the following steps are ignored.
+                                               }
                                                prevState = vState;
                                                vState = memo.TextOnlyDeriv (vState);
                                                vState = TextDeriv (vState, reader.Value, reader);
                                                if (Util.IsWhitespace (reader.Value))
                                                        vState = vState.Choice (prevState);
+                                               if (vState == RdpNotAllowed.Instance)
+                                                       vState = HandleError (String.Format ("Invalid attribute value is found. Value = '{0}'", reader.Value), false, prevState, RecoverFromInvalidText);
+                                               prevState = vState;
                                                vState = memo.EndAttDeriv (vState);
                                                if (vState == RdpNotAllowed.Instance)
-                                                       throw CreateValidationError (String.Format ("Invalid attribute value is found. Value = '{0}'", reader.Value), false);
-
-#endif
+                                                       vState = HandleError (String.Format ("Invalid attribute value is found. Value = '{0}'", reader.Value), false, prevState, RecoverFromInvalidEnd);
                                        } while (reader.MoveToNextAttribute ());
                                        MoveToElement ();
                                }
@@ -440,7 +453,7 @@ namespace Commons.Xml.Relaxng
                                prevState = vState;
                                vState = memo.StartTagCloseDeriv (vState);
                                if (vState.PatternType == RelaxngPatternType.NotAllowed)
-                                       throw CreateValidationError (String.Format ("Invalid start tag closing found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false);
+                                       vState = HandleError (String.Format ("Invalid start tag closing found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), false, prevState, RecoverFromInvalidStartTagClose);
 
                                // if it is empty, then redirect to EndElement
                                if (reader.IsEmptyElement) {
@@ -455,7 +468,7 @@ namespace Commons.Xml.Relaxng
                                prevState = vState;
                                vState = memo.EndTagDeriv (vState);
                                if (vState.PatternType == RelaxngPatternType.NotAllowed)
-                                       throw CreateValidationError (String.Format ("Invalid end tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true);
+                                       vState = HandleError (String.Format ("Invalid end tag found. LocalName = {0}, NS = {1}.", reader.LocalName, reader.NamespaceURI), true, prevState, RecoverFromInvalidEnd);
                                break;
                        case XmlNodeType.Whitespace:
                                if (inContent)
@@ -478,6 +491,122 @@ namespace Commons.Xml.Relaxng
                        return ret;
                }
 
+               #region error recovery
+               // Error recovery feature can be enabled by using
+               // InvalidNodeFound event of type RelaxngValidationEventHandler.
+               // 
+               // Other than startTagOpenDeriv, it is (again) based on
+               // James Clark's derivative algorithm.
+               // http://www.thaiopensource.com/relaxng/derivative.html
+               // For invalid start tag, we just recover from it by using
+               // xs:any-like pattern for unexpected node occurence.
+
+               RdpPattern MakeGroupHeadOptional (RdpPattern p)
+               {
+                       if (p is RdpAbstractSingleContent)
+                               return new RdpChoice (RdpEmpty.Instance, p);
+                       RdpAbstractBinary ab = p as RdpAbstractBinary;
+                       if (ab == null)
+                               return p;
+                       if (ab is RdpGroup)
+                               return new RdpGroup (new RdpChoice (RdpEmpty.Instance, ab.LValue), ab.RValue);
+                       else if (ab is RdpChoice)
+                               return new RdpChoice (MakeGroupHeadOptional (ab.LValue), MakeGroupHeadOptional (ab.RValue));
+                       else if (ab is RdpInterleave)
+                               return new RdpInterleave (MakeGroupHeadOptional (ab.LValue), MakeGroupHeadOptional (ab.RValue));
+                       else if (ab is RdpAfter) // FIXME: is it correct?
+                               return new RdpAfter (MakeGroupHeadOptional (ab.LValue), MakeGroupHeadOptional (ab.RValue));
+                       throw new SystemException ("INTERNAL ERROR: unexpected pattern: " + p.GetType ());
+               }
+
+               RdpPattern ReplaceAfterHeadWithEmpty (RdpPattern p)
+               {
+                       if (p is RdpAbstractSingleContent)
+                               return new RdpChoice (RdpEmpty.Instance, p);
+                       RdpAbstractBinary ab = p as RdpAbstractBinary;
+                       if (ab == null)
+                               return p;
+                       if (ab is RdpGroup)
+                               return new RdpGroup (ReplaceAfterHeadWithEmpty (ab.LValue), ReplaceAfterHeadWithEmpty (ab.RValue));
+                       else if (ab is RdpChoice)
+                               return new RdpChoice (ReplaceAfterHeadWithEmpty (ab.LValue), ReplaceAfterHeadWithEmpty (ab.RValue));
+                       else if (ab is RdpInterleave)
+                               return new RdpInterleave (ReplaceAfterHeadWithEmpty (ab.LValue), ReplaceAfterHeadWithEmpty (ab.RValue));
+                       else if (ab is RdpAfter)
+                               return new RdpAfter (RdpEmpty.Instance, ab.RValue);
+                       throw new SystemException ("INTERNAL ERROR: unexpected pattern: " + p.GetType ());
+               }
+
+               RdpPattern CollectAfterTailAsChoice (RdpPattern p)
+               {
+                       RdpAbstractBinary ab = p as RdpAbstractBinary;
+                       if (ab == null)
+                               return RdpEmpty.Instance;
+                       if (ab is RdpAfter)
+                               return ab.RValue;
+                       RdpPattern l = CollectAfterTailAsChoice (ab.LValue);
+                       if (l == RdpEmpty.Instance)
+                               return CollectAfterTailAsChoice (ab.RValue);
+                       RdpPattern r = CollectAfterTailAsChoice (ab.RValue);
+                       if (r == RdpEmpty.Instance)
+                               return l;
+                       return new RdpChoice (l, r);
+               }
+
+               RdpPattern ReplaceAttributesWithEmpty (RdpPattern p)
+               {
+                       if (p is RdpAttribute)
+                               return RdpEmpty.Instance;
+
+                       RdpAbstractSingleContent asc = p as RdpAbstractSingleContent;
+                       if (asc is RdpList)
+                               return new RdpList (ReplaceAttributesWithEmpty (asc.Child));
+                       if (asc is RdpOneOrMore)
+                               return new RdpOneOrMore (ReplaceAttributesWithEmpty (asc.Child));
+                       else if (asc is RdpElement)
+                               return asc; // should not be expected to contain any attribute as RdpElement.
+
+                       RdpAbstractBinary ab = p as RdpAbstractBinary;
+                       if (ab == null)
+                               return p;
+                       if (ab is RdpGroup)
+                               return new RdpGroup (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
+                       else if (ab is RdpChoice)
+                               return new RdpChoice (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
+                       else if (ab is RdpInterleave)
+                               return new RdpInterleave (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
+                       else if (ab is RdpAfter) // FIXME: is it correct?
+                               return new RdpAfter (ReplaceAttributesWithEmpty (ab.LValue), ReplaceAttributesWithEmpty (ab.RValue));
+                       throw new SystemException ("INTERNAL ERROR: unexpected pattern: " + p.GetType ());
+               }
+
+               RdpPattern RecoverFromInvalidStartTag (RdpPattern p)
+               {
+                       RdpPattern test1 = MakeGroupHeadOptional (p);
+                       test1 = memo.StartTagOpenDeriv (test1, reader.LocalName, reader.NamespaceURI);
+                       if (test1 != null)
+                               return test1;
+                       // FIXME: JJC derivative algorithm suggests more complicated recovery. We simply treats current "extra" node as "anything".
+                       return new RdpChoice (RdpPattern.Anything, p);
+               }
+
+               RdpPattern RecoverFromInvalidText (RdpPattern p)
+               {
+                       return ReplaceAfterHeadWithEmpty (p);
+               }
+
+               RdpPattern RecoverFromInvalidEnd (RdpPattern p)
+               {
+                       return CollectAfterTailAsChoice (p);
+               }
+
+               RdpPattern RecoverFromInvalidStartTagClose (RdpPattern p)
+               {
+                       return ReplaceAttributesWithEmpty (p);
+               }
+
+               #endregion
+
                RdpPattern TextDeriv (RdpPattern p, string value, XmlReader context)
                {
                        if (value.Length > 0 && p.IsTextValueDependent)
@@ -498,8 +627,15 @@ namespace Commons.Xml.Relaxng
                        case XmlNodeType.Element:
                                startElementDepth = -1;
                                if (!Util.IsWhitespace (cachedValue)) {
+                                       // HandleError() is not really useful here since it involves else condition...
                                        ts = memo.MixedTextDeriv (ts);
-                                       ts = TextDeriv (ts, cachedValue, reader);
+                                       if (InvalidNodeFound != null) {
+                                               InvalidNodeFound (reader, "Not allowed text node was found.");
+                                               ts = vState;
+                                               cachedValue = null;
+                                       }
+                                       else
+                                               ts = TextDeriv (ts, cachedValue, reader);
                                }
                                break;
                        default:
@@ -512,7 +648,7 @@ namespace Commons.Xml.Relaxng
                        vState = ts;
 
                        if (vState.PatternType == RelaxngPatternType.NotAllowed)
-                               throw CreateValidationError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true);
+                               vState = HandleError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true, prevState, RecoverFromInvalidText);
                        cachedValue = null;
                        return;
                }
@@ -528,7 +664,7 @@ namespace Commons.Xml.Relaxng
                        vState = ts;
 
                        if (vState.PatternType == RelaxngPatternType.NotAllowed)
-                               throw CreateValidationError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true);
+                               vState = HandleError (String.Format ("Invalid text found. Text value = {0} ", cachedValue), true, prevState, RecoverFromInvalidText);
                        cachedValue = null;
                        startElementDepth = -1;
                }
index baf84e696a8800aac3e0b782737582bf4f434de9..e7360696b55c327a4680d00c8832e9c5bf503238 100644 (file)
@@ -1,3 +1,7 @@
+2009-02-23  Atsushi Enomoto <atsushi@ximian.com>
+
+       * relaxngtest.cs : added --skip-error option to test error recovery.
+
 2005-04-11  Atsushi Enomoto <atsushi@ximian.com>
 
        * anglia-test-runner.cs : added some options.
index 5b7f3f50bb5d3d45bd45a8906137ef3f75c3a987..162b56035a75a507712e4631154f329476c3698b 100644 (file)
@@ -7,9 +7,13 @@ using Commons.Xml.Relaxng.Derivative;
 public class Test\r
 {\r
        static char SEP = Path.DirectorySeparatorChar;\r
+       static bool skip_error = true;\r
 \r
-       public static void Main ()\r
+       public static void Main (string [] args)\r
        {\r
+               if (args.Length > 0 && args [0] == "--skip-error")\r
+                       skip_error = true;\r
+\r
 Console.WriteLine ("Started:  " + DateTime.Now.ToString ("yyyy-MM-dd HH:mm:ss.fff"));\r
                RunTest ();\r
 Console.WriteLine ("Finished: " + DateTime.Now.ToString ("yyyy-MM-dd HH:mm:ss.fff"));\r
@@ -17,20 +21,7 @@ Console.WriteLine ("Finished: " + DateTime.Now.ToString ("yyyy-MM-dd HH:mm:ss.ff
 \r
        static void RunTest ()\r
        {\r
-               foreach (DirectoryInfo di in\r
-                       new DirectoryInfo (@"relax-ng").GetDirectories ()) {\r
-\r
-/*\r
-if (di.Name == "056") // baseURI\r
-       continue;\r
-if (di.Name == "102") // invalid URI fragment\r
-       continue;\r
-if (di.Name == "208") // infinite loop!!\r
-       continue;\r
-if (di.Name == "210") // infinite loop!!\r
-       continue;\r
-*/\r
-\r
+               foreach (DirectoryInfo di in new DirectoryInfo (@"relax-ng").GetDirectories ()) {\r
                        XmlTextReader xtr = null;\r
                        FileInfo fi = new FileInfo (di.FullName + "/i.rng");\r
                        // Invalid grammar case:\r
@@ -69,14 +60,19 @@ if (di.Name == "210") // infinite loop!!
                        foreach (FileInfo inst in di.GetFiles ("*.xml")) {\r
                                try {\r
                                        RelaxngValidatingReader vr = new RelaxngValidatingReader (new XmlTextReader (inst.FullName), p);\r
+                                       if (skip_error)\r
+                                               vr.InvalidNodeFound += RelaxngValidatingReader.IgnoreError;\r
                                        while (!vr.EOF)\r
                                                vr.Read ();\r
-                                       if (inst.Name.IndexOf ("i.") >= 0)\r
+                                       if (inst.Name.IndexOf ("i.") >= 0 && !skip_error)\r
                                                Console.WriteLine ("Incorrectly validated instance: " + di.Name + "/" + inst.Name);\r
                                } catch (RelaxngException ex) {\r
+                                       string path = di.Name + "/" + inst.Name;\r
+                                       if (skip_error)\r
+                                               Console.WriteLine ("Failed to skip error : " + path + ex.Message);\r
                                        if (inst.Name.IndexOf ("i.") >= 0)\r
                                                continue;\r
-                                       Console.WriteLine ("Invalidated instance: " + di.Name + "/" + inst.Name + " : " + ex.Message);\r
+                                       Console.WriteLine ("Invalidated instance: " + path + " : " + ex.Message);\r
                                }\r
                        }\r
                }\r