2 // System.Xml.DTDReader
5 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 // (C)2003 Atsushi Enomoto
11 // When a parameter entity contains cp section, it should be closed
12 // within that declaration.
14 // Resolution to external entities from different BaseURI fails (it is
15 // the same as MS.NET 1.1, but should be fixed in the future).
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 using System.Collections;
41 using System.Globalization;
45 using System.Xml.Schema;
49 internal class DTDReader : IXmlLineInfo
51 private XmlParserInput currentInput;
52 private Stack parserInputStack;
54 private char [] nameBuffer;
55 private int nameLength;
56 private int nameCapacity;
57 private const int initialNameCapacity = 256;
59 private StringBuilder valueBuffer;
61 private int currentLinkedNodeLineNumber;
62 private int currentLinkedNodeLinePosition;
64 // Parameter entity placeholder
65 private int dtdIncludeSect;
67 private bool normalization;
69 private bool processingInternalSubset;
71 string cachedPublicId;
72 string cachedSystemId;
77 public event ValidationEventHandler ValidationEventHandler;
82 public DTDReader (DTDObjectModel dtd,
84 int startLinePosition)
87 currentLinkedNodeLineNumber = startLineNumber;
88 currentLinkedNodeLinePosition = startLinePosition;
94 public string BaseURI {
95 get { return currentInput.BaseURI; }
98 public bool Normalization {
99 get { return normalization; }
100 set { normalization = value; }
103 public int LineNumber {
104 get { return currentInput.LineNumber; }
107 public int LinePosition {
108 get { return currentInput.LinePosition; }
111 public bool HasLineInfo ()
118 private XmlException NotWFError (string message)
120 return new XmlException (this as IXmlLineInfo, BaseURI, message);
125 parserInputStack = new Stack ();
127 nameBuffer = new char [initialNameCapacity];
129 nameCapacity = initialNameCapacity;
131 valueBuffer = new StringBuilder (512);
134 internal DTDObjectModel GenerateDTDObjectModel ()
137 int originalParserDepth = parserInputStack.Count;
139 if (DTD.InternalSubset != null && DTD.InternalSubset.Length > 0) {
140 this.processingInternalSubset = true;
141 XmlParserInput original = currentInput;
143 currentInput = new XmlParserInput (
144 new StringReader (DTD.InternalSubset),
146 currentLinkedNodeLineNumber,
147 currentLinkedNodeLinePosition);
148 currentInput.InitialState = false;
150 more = ProcessDTDSubset ();
151 if (PeekChar () == -1 && parserInputStack.Count > 0)
153 } while (more || parserInputStack.Count > originalParserDepth);
154 if (dtdIncludeSect != 0)
155 throw NotWFError ("INCLUDE section is not ended correctly.");
157 currentInput = original;
158 this.processingInternalSubset = false;
160 if (DTD.SystemId != null && DTD.SystemId != String.Empty && DTD.Resolver != null) {
161 PushParserInput (DTD.SystemId);
163 more = ProcessDTDSubset ();
164 if (PeekChar () == -1 && parserInputStack.Count > 1)
166 } while (more || parserInputStack.Count > originalParserDepth + 1);
167 if (dtdIncludeSect != 0)
168 throw NotWFError ("INCLUDE section is not ended correctly.");
172 ArrayList sc = new ArrayList ();
174 // Entity recursion check.
175 foreach (DTDEntityDeclaration ent in DTD.EntityDecls.Values) {
176 if (ent.NotationName != null) {
177 ent.ScanEntityValue (sc);
181 // release unnecessary memory usage
182 DTD.ExternalResources.Clear ();
187 // Read any one of following:
188 // elementdecl, AttlistDecl, EntityDecl, NotationDecl,
189 // PI, Comment, Parameter Entity, or doctype termination char(']')
191 // Returns true if it may have any more contents, or false if not.
192 private bool ProcessDTDSubset ()
195 int c2 = ReadChar ();
201 // It affects on entity references' well-formedness
202 if (this.processingInternalSubset)
203 DTD.InternalSubsetHasPEReference = true;
204 string peName = ReadName ();
206 string peValue = GetPEValue (peName);
207 if (peValue == String.Empty)
209 currentInput.InsertParameterEntityBuffer (peValue);
210 // int currentLine = currentInput.LineNumber;
211 // int currentColumn = currentInput.LinePosition;
212 while (currentInput.HasPEBuffer)
215 // FIXME: Implement correct nest-level check.
216 // Don't depend on lineinfo (might not be supplied)
217 // if (currentInput.LineNumber != currentLine ||
218 // currentInput.LinePosition != currentColumn)
219 // throw NotWFError ("Incorrectly nested parameter entity.");
226 // Only read, no store.
227 ReadProcessingInstruction ();
230 CompileDeclaration ();
233 throw NotWFError ("Unexpected end of stream.");
235 throw NotWFError ("Syntax Error after '<' character: " + (char) c);
239 if (dtdIncludeSect == 0)
240 throw NotWFError ("Unbalanced end of INCLUDE/IGNORE section.");
247 throw NotWFError (String.Format ("Syntax Error inside doctypedecl markup : {0}({1})", c2, (char) c2));
249 currentInput.InitialState = false;
253 private void CompileDeclaration ()
259 // Only read, no store.
267 if (!SkipWhitespace ())
269 "Whitespace is required after '<!ENTITY' in DTD entity declaration.");
271 if (PeekChar () == '%') {
273 if (!SkipWhitespace ()) {
277 // FIXME: Is this allowed? <!ENTITY % %name; ...>
278 // (i.e. Can PE name be replaced by another PE?)
280 if (XmlChar.IsNameChar (PeekChar ()))
281 ReadParameterEntityDecl ();
283 throw NotWFError ("expected name character");
287 DTDEntityDeclaration ent = ReadEntityDecl ();
288 if (DTD.EntityDecls [ent.Name] == null)
289 DTD.EntityDecls.Add (ent.Name, ent);
293 DTDElementDeclaration el = ReadElementDecl ();
294 DTD.ElementDecls.Add (el.Name, el);
297 throw NotWFError ("Syntax Error after '<!E' (ELEMENT or ENTITY must be found)");
302 DTDAttListDeclaration atl = ReadAttListDecl ();
303 DTD.AttListDecls.Add (atl.Name, atl);
307 DTDNotationDeclaration not = ReadNotationDecl ();
308 DTD.NotationDecls.Add (not.Name, not);
311 // conditional sections
315 switch (ReadChar ()) {
318 ExpectAfterWhitespace ('[');
328 throw NotWFError ("Syntax Error after '<!' characters.");
332 private void ReadIgnoreSect ()
334 ExpectAfterWhitespace ('[');
335 int dtdIgnoreSect = 1;
337 while (dtdIgnoreSect > 0) {
338 switch (ReadChar ()) {
340 throw NotWFError ("Unexpected IGNORE section end.");
342 if (PeekChar () != '!')
345 if (PeekChar () != '[')
351 if (PeekChar () != ']')
354 if (PeekChar () != '>')
361 if (dtdIgnoreSect != 0)
362 throw NotWFError ("IGNORE section is not ended correctly.");
365 // The reader is positioned on the head of the name.
366 private DTDElementDeclaration ReadElementDecl ()
368 DTDElementDeclaration decl = new DTDElementDeclaration (DTD);
369 decl.IsInternalSubset = this.processingInternalSubset;
371 if (!SkipWhitespace ())
372 throw NotWFError ("Whitespace is required between '<!ELEMENT' and name in DTD element declaration.");
374 decl.Name = ReadName ();
375 if (!SkipWhitespace ())
376 throw NotWFError ("Whitespace is required between name and content in DTD element declaration.");
378 ReadContentSpec (decl);
380 // This expanding is only allowed as a non-validating parser.
386 // read 'children'(BNF) of contentspec
387 private void ReadContentSpec (DTDElementDeclaration decl)
401 DTDContentModel model = decl.ContentModel;
404 if(PeekChar () == '#') {
405 // Mixed Contents. "#PCDATA" must appear first.
406 decl.IsMixedContent = true;
407 model.Occurence = DTDOccurence.ZeroOrMore;
408 model.OrderType = DTDContentOrderType.Or;
412 while(PeekChar () != ')') {
414 if (PeekChar () == '%') {
421 DTDContentModel elem = new DTDContentModel (DTD, decl.Name);
422 // elem.LineNumber = currentInput.LineNumber;
423 // elem.LinePosition = currentInput.LinePosition;
424 elem.ElementName = ReadName ();
425 this.AddContentModel (model.ChildModels, elem);
430 if (model.ChildModels.Count > 0)
432 else if (PeekChar () == '*')
435 // Non-Mixed Contents
436 model.ChildModels.Add (ReadCP (decl));
439 do { // copied from ReadCP() ...;-)
440 if (PeekChar () == '%') {
444 if(PeekChar ()=='|') {
446 if (model.OrderType == DTDContentOrderType.Seq)
447 throw NotWFError ("Inconsistent choice markup in sequence cp.");
448 model.OrderType = DTDContentOrderType.Or;
451 AddContentModel (model.ChildModels, ReadCP (decl));
454 else if(PeekChar () == ',')
457 if (model.OrderType == DTDContentOrderType.Or)
458 throw NotWFError ("Inconsistent sequence markup in choice cp.");
459 model.OrderType = DTDContentOrderType.Seq;
462 model.ChildModels.Add (ReadCP (decl));
474 model.Occurence = DTDOccurence.Optional;
478 model.Occurence = DTDOccurence.ZeroOrMore;
482 model.Occurence = DTDOccurence.OneOrMore;
491 throw NotWFError ("ContentSpec is missing.");
495 // Read 'cp' (BNF) of contentdecl (BNF)
496 private DTDContentModel ReadCP (DTDElementDeclaration elem)
498 DTDContentModel model = null;
500 if(PeekChar () == '(') {
501 model = new DTDContentModel (DTD, elem.Name);
504 model.ChildModels.Add (ReadCP (elem));
507 if (PeekChar () == '%') {
511 if(PeekChar ()=='|') {
513 if (model.OrderType == DTDContentOrderType.Seq)
514 throw NotWFError ("Inconsistent choice markup in sequence cp.");
515 model.OrderType = DTDContentOrderType.Or;
518 AddContentModel (model.ChildModels, ReadCP (elem));
521 else if(PeekChar () == ',') {
523 if (model.OrderType == DTDContentOrderType.Or)
524 throw NotWFError ("Inconsistent sequence markup in choice cp.");
525 model.OrderType = DTDContentOrderType.Seq;
528 model.ChildModels.Add (ReadCP (elem));
535 ExpectAfterWhitespace (')');
539 model = new DTDContentModel (DTD, elem.Name);
540 model.ElementName = ReadName ();
543 switch(PeekChar ()) {
545 model.Occurence = DTDOccurence.Optional;
549 model.Occurence = DTDOccurence.ZeroOrMore;
553 model.Occurence = DTDOccurence.OneOrMore;
560 private void AddContentModel (DTDContentModelCollection cmc, DTDContentModel cm)
562 if (cm.ElementName != null) {
563 for (int i = 0; i < cmc.Count; i++) {
564 if (cmc [i].ElementName == cm.ElementName) {
565 HandleError (new XmlSchemaException ("Element content must be unique inside mixed content model.",
578 // The reader is positioned on the first name char.
579 private void ReadParameterEntityDecl ()
581 DTDParameterEntityDeclaration decl =
582 new DTDParameterEntityDeclaration (DTD);
583 decl.BaseURI = BaseURI;
585 decl.Name = ReadName ();
586 if (!SkipWhitespace ())
587 throw NotWFError ("Whitespace is required after name in DTD parameter entity declaration.");
589 if (PeekChar () == 'S' || PeekChar () == 'P') {
590 // read publicId/systemId
592 decl.PublicId = cachedPublicId;
593 decl.SystemId = cachedSystemId;
595 decl.Resolve (this.DTD.Resolver);
597 ResolveExternalEntityReplacementText (decl);
600 int quoteChar = ReadChar ();
601 if (quoteChar != '\'' && quoteChar != '"')
602 throw NotWFError ("quotation char was expected.");
609 throw NotWFError ("unexpected end of stream in entity value definition.");
611 if (quoteChar == '"')
614 AppendValueChar ('"');
617 if (quoteChar == '\'')
620 AppendValueChar ('\'');
623 if (XmlChar.IsInvalid (c))
624 throw NotWFError ("Invalid character was used to define parameter entity.");
629 decl.LiteralEntityValue = CreateValueString ();
631 ResolveInternalEntityReplacementText (decl);
633 ExpectAfterWhitespace ('>');
636 if (DTD.PEDecls [decl.Name] == null) {
637 DTD.PEDecls.Add (decl.Name, decl);
641 private void ResolveExternalEntityReplacementText (DTDEntityBase decl)
643 if (decl.LiteralEntityValue.StartsWith ("<?xml")) {
644 XmlTextReader xtr = new XmlTextReader (decl.LiteralEntityValue, XmlNodeType.Element, null);
645 if (decl is DTDEntityDeclaration) {
646 // GE - also checked as valid contents
647 StringBuilder sb = new StringBuilder ();
648 xtr.Normalization = this.Normalization;
651 sb.Append (xtr.ReadOuterXml ());
652 decl.ReplacementText = sb.ToString ();
656 decl.ReplacementText = xtr.GetRemainder ().ReadToEnd ();
659 decl.ReplacementText = decl.LiteralEntityValue;
662 private void ResolveInternalEntityReplacementText (DTDEntityBase decl)
664 string value = decl.LiteralEntityValue;
665 int len = value.Length;
667 for (int i = 0; i < len; i++) {
674 end = value.IndexOf (';', i);
676 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
678 if (value [i] == '#') {
680 ch = GetCharacterReference (decl, value, ref i, end);
681 if (XmlChar.IsInvalid (ch))
682 throw NotWFError ("Invalid character was used to define parameter entity.");
685 name = value.Substring (i, end - i);
686 // don't expand "general" entity.
687 AppendValueChar ('&');
688 valueBuffer.Append (name);
689 AppendValueChar (';');
693 if (XmlChar.IsInvalid (ch))
694 throw new XmlException (decl, decl.BaseURI, "Invalid character was found in the entity declaration.");
695 AppendValueChar (ch);
699 end = value.IndexOf (';', i);
701 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
702 name = value.Substring (i, end - i);
703 valueBuffer.Append (GetPEValue (name));
707 AppendValueChar (ch);
711 decl.ReplacementText = CreateValueString ();
713 if (decl is DTDEntityDeclaration) {
714 // GE - also checked as valid contents
715 XmlTextReader xtr = new XmlTextReader (decl.ReplacementText, XmlNodeType.Element, null);
716 StringBuilder sb = new StringBuilder ();
717 xtr.Normalization = this.Normalization;
720 sb.Append (xtr.ReadOuterXml ());
721 decl.ReplacementText = sb.ToString ();
726 private int GetCharacterReference (DTDEntityBase li, string value, ref int index, int end)
729 if (value [index] == 'x') {
731 ret = int.Parse (value.Substring (index + 1, end - index - 1), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
732 } catch (FormatException) {
733 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
737 ret = int.Parse (value.Substring (index, end - index), CultureInfo.InvariantCulture);
738 } catch (FormatException) {
739 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
746 private string GetPEValue (string peName)
748 DTDParameterEntityDeclaration peDecl =
749 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
750 if (peDecl != null) {
751 if (peDecl.IsInternalSubset)
752 throw NotWFError ("Parameter entity is not allowed in internal subset entity '" + peName + "'");
753 return peDecl.ReplacementText;
755 // See XML 1.0 section 4.1 for both WFC and VC.
756 if ((DTD.SystemId == null && !DTD.InternalSubsetHasPEReference) || DTD.IsStandalone)
757 throw NotWFError (String.Format ("Parameter entity '{0}' not found.",peName));
758 HandleError (new XmlSchemaException (
759 "Parameter entity " + peName + " not found.", null));
763 private bool TryExpandPERef ()
765 if (PeekChar () != '%')
767 while (PeekChar () == '%') {
768 TryExpandPERefSpaceKeep ();
774 // Tries to expand parameter entities, but it should not skip spaces
775 private bool TryExpandPERefSpaceKeep ()
777 if (PeekChar () == '%') {
778 if (this.processingInternalSubset)
779 throw NotWFError ("Parameter entity reference is not allowed inside internal subset.");
788 // reader is positioned after '%'
789 private void ExpandPERef ()
791 string peName = ReadName ();
793 DTDParameterEntityDeclaration peDecl =
794 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
795 if (peDecl == null) {
796 HandleError (new XmlSchemaException ("Parameter entity " + peName + " not found.", null));
797 return; // do nothing
799 // FIXME: These leading/trailing ' ' is anyways supplied inside this method!
800 // currentInput.InsertParameterEntityBuffer (" " + peDecl.ReplacementText + " ");
801 currentInput.InsertParameterEntityBuffer (peDecl.ReplacementText);
804 // The reader is positioned on the head of the name.
805 private DTDEntityDeclaration ReadEntityDecl ()
807 DTDEntityDeclaration decl = new DTDEntityDeclaration (DTD);
808 decl.IsInternalSubset = this.processingInternalSubset;
810 decl.Name = ReadName ();
811 if (!SkipWhitespace ())
812 throw NotWFError ("Whitespace is required between name and content in DTD entity declaration.");
815 if (PeekChar () == 'S' || PeekChar () == 'P') {
818 decl.PublicId = cachedPublicId;
819 decl.SystemId = cachedSystemId;
820 if (SkipWhitespace ()) {
821 if (PeekChar () == 'N') {
824 if (!SkipWhitespace ())
825 throw NotWFError ("Whitespace is required after NDATA.");
826 decl.NotationName = ReadName (); // ndata_name
829 if (decl.NotationName == null) {
830 decl.Resolve (this.DTD.Resolver);
831 ResolveExternalEntityReplacementText (decl);
834 decl.LiteralEntityValue = String.Empty;
835 decl.ReplacementText = String.Empty;
840 ReadEntityValueDecl (decl);
841 ResolveInternalEntityReplacementText (decl);
844 // This expanding is only allowed as a non-validating parser.
850 private void ReadEntityValueDecl (DTDEntityDeclaration decl)
853 // quotation char will be finally removed on unescaping
854 int quoteChar = ReadChar ();
855 if (quoteChar != '\'' && quoteChar != '"')
856 throw NotWFError ("quotation char was expected.");
859 while (PeekChar () != quoteChar) {
860 int ch = ReadChar ();
863 string name = ReadName ();
865 if (decl.IsInternalSubset)
866 throw NotWFError (String.Format ("Parameter entity is not allowed in internal subset entity '{0}'", name));
867 valueBuffer.Append (GetPEValue (name));
870 throw NotWFError ("unexpected end of stream.");
872 if (this.normalization && XmlChar.IsInvalid (ch))
873 throw NotWFError ("Invalid character was found in the entity declaration.");
874 AppendValueChar (ch);
878 // string value = Dereference (CreateValueString (), false);
879 string value = CreateValueString ();
883 decl.LiteralEntityValue = value;
886 private DTDAttListDeclaration ReadAttListDecl ()
888 TryExpandPERefSpaceKeep ();
889 if (!SkipWhitespace ())
890 throw NotWFError ("Whitespace is required between ATTLIST and name in DTD attlist declaration.");
892 string name = ReadName (); // target element name
893 DTDAttListDeclaration decl =
894 DTD.AttListDecls [name] as DTDAttListDeclaration;
896 decl = new DTDAttListDeclaration (DTD);
897 decl.IsInternalSubset = this.processingInternalSubset;
900 if (!SkipWhitespace ())
901 if (PeekChar () != '>')
902 throw NotWFError ("Whitespace is required between name and content in non-empty DTD attlist declaration.");
906 while (XmlChar.IsNameChar (PeekChar ())) {
907 DTDAttributeDefinition def = ReadAttributeDefinition ();
908 // There must not be two or more ID attributes.
909 if (def.Datatype.TokenizedType == XmlTokenizedType.ID) {
910 for (int i = 0; i < decl.Definitions.Count; i++) {
911 DTDAttributeDefinition d = decl [i];
912 if (d.Datatype.TokenizedType == XmlTokenizedType.ID) {
913 HandleError (new XmlSchemaException ("AttList declaration must not contain two or more ID attributes.",
914 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
919 if (decl [def.Name] == null)
925 // This expanding is only allowed as a non-validating parser.
931 private DTDAttributeDefinition ReadAttributeDefinition ()
933 DTDAttributeDefinition def = new DTDAttributeDefinition (DTD);
934 def.IsInternalSubset = this.processingInternalSubset;
938 def.Name = ReadName ();
939 if (!SkipWhitespace ())
940 throw NotWFError ("Whitespace is required between name and content in DTD attribute definition.");
944 switch(PeekChar ()) {
947 def.Datatype = XmlSchemaDatatype.FromName ("normalizedString", XmlSchema.Namespace);
949 case 'I': // ID, IDREF, IDREFS
951 if(PeekChar () == 'R') {
953 if(PeekChar () == 'S') {
956 def.Datatype = XmlSchemaDatatype.FromName ("IDREFS", XmlSchema.Namespace);
959 def.Datatype = XmlSchemaDatatype.FromName ("IDREF", XmlSchema.Namespace);
962 def.Datatype = XmlSchemaDatatype.FromName ("ID", XmlSchema.Namespace);
964 case 'E': // ENTITY, ENTITIES
966 switch(ReadChar ()) {
968 def.Datatype = XmlSchemaDatatype.FromName ("ENTITY", XmlSchema.Namespace);
970 case 'I': // ENTITIES
972 def.Datatype = XmlSchemaDatatype.FromName ("ENTITIES", XmlSchema.Namespace);
976 case 'N': // NMTOKEN, NMTOKENS, NOTATION
978 switch(PeekChar ()) {
981 if(PeekChar ()=='S') { // NMTOKENS
983 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKENS", XmlSchema.Namespace);
986 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
990 def.Datatype = XmlSchemaDatatype.FromName ("NOTATION", XmlSchema.Namespace);
991 if (!SkipWhitespace ())
992 throw NotWFError ("Whitespace is required between name and content in DTD attribute definition.");
995 def.EnumeratedNotations.Add (ReadName ()); // notation name
997 while(PeekChar () == '|') {
1000 def.EnumeratedNotations.Add (ReadName ()); // notation name
1006 throw NotWFError ("attribute declaration syntax error.");
1009 default: // Enumerated Values
1010 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
1014 def.EnumeratedAttributeDeclaration.Add (
1015 def.Datatype.Normalize (ReadNmToken ())); // enum value
1017 while(PeekChar () == '|') {
1020 def.EnumeratedAttributeDeclaration.Add (
1021 def.Datatype.Normalize (ReadNmToken ())); // enum value
1027 TryExpandPERefSpaceKeep ();
1028 if (!SkipWhitespace ())
1029 throw NotWFError ("Whitespace is required between type and occurence in DTD attribute definition.");
1032 ReadAttributeDefaultValue (def);
1037 private void ReadAttributeDefaultValue (DTDAttributeDefinition def)
1039 if(PeekChar () == '#')
1045 Expect ("REQUIRED");
1046 def.OccurenceType = DTDAttributeOccurenceType.Required;
1050 def.OccurenceType = DTDAttributeOccurenceType.Optional;
1054 def.OccurenceType = DTDAttributeOccurenceType.Fixed;
1055 if (!SkipWhitespace ())
1056 throw NotWFError ("Whitespace is required between FIXED and actual value in DTD attribute definition.");
1057 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1061 // one of the enumerated value
1064 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1067 // VC: If default value exists, it should be valid.
1068 if (def.DefaultValue != null) {
1069 string normalized = def.Datatype.Normalize (def.DefaultValue);
1070 bool breakup = false;
1071 object parsed = null;
1073 // enumeration validity
1074 if (def.EnumeratedAttributeDeclaration.Count > 0) {
1075 if (!def.EnumeratedAttributeDeclaration.Contains (normalized)) {
1076 HandleError (new XmlSchemaException ("Default value is not one of the enumerated values.",
1077 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1081 if (def.EnumeratedNotations.Count > 0) {
1082 if (!def.EnumeratedNotations.Contains (normalized)) {
1083 HandleError (new XmlSchemaException ("Default value is not one of the enumerated notation values.",
1084 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1089 // type based validity
1092 parsed = def.Datatype.ParseValue (normalized, DTD.NameTable, null);
1093 } catch (Exception ex) { // FIXME: (wishlist) bad catch ;-(
1094 HandleError (new XmlSchemaException ("Invalid default value for ENTITY type.",
1095 def.LineNumber, def.LinePosition, null, def.BaseURI, ex));
1100 switch (def.Datatype.TokenizedType) {
1101 case XmlTokenizedType.ENTITY:
1102 if (DTD.EntityDecls [normalized] == null)
1103 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1104 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1106 case XmlTokenizedType.ENTITIES:
1107 string [] entities = parsed as string [];
1108 for (int i = 0; i < entities.Length; i++) {
1109 string entity = entities [i];
1110 if (DTD.EntityDecls [entity] == null)
1111 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1112 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1118 // Extra ID attribute validity check.
1119 if (def.Datatype != null && def.Datatype.TokenizedType == XmlTokenizedType.ID)
1120 if (def.UnresolvedDefaultValue != null)
1121 HandleError (new XmlSchemaException ("ID attribute must not have fixed value constraint.",
1122 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1126 private DTDNotationDeclaration ReadNotationDecl()
1128 DTDNotationDeclaration decl = new DTDNotationDeclaration (DTD);
1129 if (!SkipWhitespace ())
1130 throw NotWFError ("Whitespace is required between NOTATION and name in DTD notation declaration.");
1132 decl.Name = ReadName (); // notation name
1134 if (namespaces) { // copy from SetProperties ;-)
1135 int indexOfColon = decl.Name.IndexOf (':');
1137 if (indexOfColon == -1) {
1138 decl.Prefix = String.Empty;
1139 decl.LocalName = decl.Name;
1141 decl.Prefix = decl.Name.Substring (0, indexOfColon);
1142 decl.LocalName = decl.Name.Substring (indexOfColon + 1);
1146 decl.Prefix = String.Empty;
1147 decl.LocalName = decl.Name;
1151 if(PeekChar () == 'P') {
1152 decl.PublicId = ReadPubidLiteral ();
1153 bool wsSkipped = SkipWhitespace ();
1154 if (PeekChar () == '\'' || PeekChar () == '"') {
1156 throw NotWFError ("Whitespace is required between public id and system id.");
1157 decl.SystemId = ReadSystemLiteral (false);
1160 } else if(PeekChar () == 'S') {
1161 decl.SystemId = ReadSystemLiteral (true);
1164 if(decl.PublicId == null && decl.SystemId == null)
1165 throw NotWFError ("public or system declaration required for \"NOTATION\" declaration.");
1166 // This expanding is only allowed as a non-validating parser.
1172 private void ReadExternalID () {
1173 switch (PeekChar ()) {
1175 cachedSystemId = ReadSystemLiteral (true);
1178 cachedPublicId = ReadPubidLiteral ();
1179 if (!SkipWhitespace ())
1180 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
1181 cachedSystemId = ReadSystemLiteral (false);
1186 // The reader is positioned on the first 'S' of "SYSTEM".
1187 private string ReadSystemLiteral (bool expectSYSTEM)
1191 if (!SkipWhitespace ())
1192 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
1196 int quoteChar = ReadChar (); // apos or quot
1198 ClearValueBuffer ();
1199 while (c != quoteChar) {
1202 throw NotWFError ("Unexpected end of stream in ExternalID.");
1204 AppendValueChar (c);
1206 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1209 private string ReadPubidLiteral()
1212 if (!SkipWhitespace ())
1213 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
1214 int quoteChar = ReadChar ();
1216 ClearValueBuffer ();
1217 while(c != quoteChar)
1220 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
1221 if(c != quoteChar && !XmlChar.IsPubidChar (c))
1222 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char) c));
1224 AppendValueChar (c);
1226 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1229 // The reader is positioned on the first character
1231 internal string ReadName ()
1233 return ReadNameOrNmToken(false);
1236 // The reader is positioned on the first character
1238 private string ReadNmToken ()
1240 return ReadNameOrNmToken(true);
1243 private string ReadNameOrNmToken(bool isNameToken)
1245 int ch = PeekChar ();
1247 if (!XmlChar.IsNameChar (ch))
1248 throw NotWFError (String.Format ("a nmtoken did not start with a legal character {0} ({1})", ch, (char) ch));
1251 if (!XmlChar.IsFirstNameChar (ch))
1252 throw NotWFError (String.Format ("a name did not start with a legal character {0} ({1})", ch, (char) ch));
1257 AppendNameChar (ReadChar ());
1259 while (XmlChar.IsNameChar (PeekChar ())) {
1260 AppendNameChar (ReadChar ());
1263 return CreateNameString ();
1266 // Read the next character and compare it against the
1267 // specified character.
1268 private void Expect (int expected)
1270 int ch = ReadChar ();
1272 if (ch != expected) {
1273 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1274 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1282 private void Expect (string expected)
1284 int len = expected.Length;
1285 for (int i=0; i< len; i++)
1286 Expect (expected [i]);
1289 private void ExpectAfterWhitespace (char c)
1292 int i = ReadChar ();
1293 if (XmlChar.IsWhitespace (i))
1296 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0} but found {1} [{2}].", c, (char) i, i));
1301 // Does not consume the first non-whitespace character.
1302 private bool SkipWhitespace ()
1304 bool skipped = XmlChar.IsWhitespace (PeekChar ());
1305 while (XmlChar.IsWhitespace (PeekChar ()))
1310 private int PeekChar ()
1312 return currentInput.PeekChar ();
1315 private int ReadChar ()
1317 return currentInput.ReadChar ();
1320 private string ExpandSurrogateChar (int ch)
1322 if (ch < Char.MaxValue)
1323 return ((char) ch).ToString ();
1325 char [] tmp = new char [] {(char) (ch / 0x10000 + 0xD800 - 1), (char) (ch % 0x10000 + 0xDC00)};
1326 return new string (tmp);
1330 // The reader is positioned on the first character after
1331 // the leading '<!--'.
1332 private void ReadComment ()
1334 currentInput.InitialState = false;
1336 while (PeekChar () != -1) {
1337 int ch = ReadChar ();
1339 if (ch == '-' && PeekChar () == '-') {
1342 if (PeekChar () != '>')
1343 throw NotWFError ("comments cannot contain '--'");
1349 if (XmlChar.IsInvalid (ch))
1350 throw NotWFError ("Not allowed character was found.");
1354 // The reader is positioned on the first character
1357 // It may be xml declaration or processing instruction.
1358 private void ReadProcessingInstruction ()
1360 string target = ReadName ();
1361 if (target == "xml") {
1362 ReadTextDeclaration ();
1364 } else if (String.Compare (target, "xml", true, CultureInfo.InvariantCulture) == 0)
1365 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1367 currentInput.InitialState = false;
1369 if (!SkipWhitespace ())
1370 if (PeekChar () != '?')
1371 throw NotWFError ("Invalid processing instruction name was found.");
1373 while (PeekChar () != -1) {
1374 int ch = ReadChar ();
1376 if (ch == '?' && PeekChar () == '>') {
1383 // The reader is positioned after "<?xml "
1384 private void ReadTextDeclaration ()
1386 if (!currentInput.InitialState)
1387 throw NotWFError ("Text declaration cannot appear in this state.");
1389 currentInput.InitialState = false;
1394 if (PeekChar () == 'v') {
1396 ExpectAfterWhitespace ('=');
1398 int quoteChar = ReadChar ();
1399 char [] expect1_0 = new char [3];
1400 int versionLength = 0;
1401 switch (quoteChar) {
1404 while (PeekChar () != quoteChar) {
1405 if (PeekChar () == -1)
1406 throw NotWFError ("Invalid version declaration inside text declaration.");
1407 else if (versionLength == 3)
1408 throw NotWFError ("Invalid version number inside text declaration.");
1410 expect1_0 [versionLength] = (char) ReadChar ();
1412 if (versionLength == 3 && new String (expect1_0) != "1.0")
1413 throw NotWFError ("Invalid version number inside text declaration.");
1420 throw NotWFError ("Invalid version declaration inside text declaration.");
1424 if (PeekChar () == 'e') {
1425 Expect ("encoding");
1426 ExpectAfterWhitespace ('=');
1428 int quoteChar = ReadChar ();
1429 switch (quoteChar) {
1432 while (PeekChar () != quoteChar)
1433 if (ReadChar () == -1)
1434 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1439 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1441 // Encoding value should be checked inside XmlInputStream.
1444 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
1449 // Note that now this method behaves differently from
1450 // XmlTextReader's one. It calles AppendValueChar() internally.
1451 private int ReadCharacterReference ()
1455 if (PeekChar () == 'x') {
1458 while (PeekChar () != ';' && PeekChar () != -1) {
1459 int ch = ReadChar ();
1461 if (ch >= '0' && ch <= '9')
1462 value = (value << 4) + ch - '0';
1463 else if (ch >= 'A' && ch <= 'F')
1464 value = (value << 4) + ch - 'A' + 10;
1465 else if (ch >= 'a' && ch <= 'f')
1466 value = (value << 4) + ch - 'a' + 10;
1468 throw NotWFError (String.Format (
1469 CultureInfo.InvariantCulture,
1470 "invalid hexadecimal digit: {0} (#x{1:X})",
1475 while (PeekChar () != ';' && PeekChar () != -1) {
1476 int ch = ReadChar ();
1478 if (ch >= '0' && ch <= '9')
1479 value = value * 10 + ch - '0';
1481 throw NotWFError (String.Format (
1482 CultureInfo.InvariantCulture,
1483 "invalid decimal digit: {0} (#x{1:X})",
1491 // There is no way to save surrogate pairs...
1492 if (XmlChar.IsInvalid (value))
1493 throw NotWFError ("Referenced character was not allowed in XML.");
1494 AppendValueChar (value);
1498 private void AppendNameChar (int ch)
1500 CheckNameCapacity ();
1501 if (ch < Char.MaxValue)
1502 nameBuffer [nameLength++] = (char) ch;
1504 nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1505 CheckNameCapacity ();
1506 nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00);
1510 private void CheckNameCapacity ()
1512 if (nameLength == nameCapacity) {
1513 nameCapacity = nameCapacity * 2;
1514 char [] oldNameBuffer = nameBuffer;
1515 nameBuffer = new char [nameCapacity];
1516 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1520 private string CreateNameString ()
1522 return DTD.NameTable.Add (nameBuffer, 0, nameLength);
1525 private void AppendValueChar (int ch)
1527 if (ch < Char.MaxValue)
1528 valueBuffer.Append ((char) ch);
1530 valueBuffer.Append (ExpandSurrogateChar (ch));
1533 private string CreateValueString ()
1535 return valueBuffer.ToString ();
1538 private void ClearValueBuffer ()
1540 valueBuffer.Length = 0;
1543 // The reader is positioned on the quote character.
1544 // *Keeps quote char* to value to get_QuoteChar() correctly.
1545 private string ReadDefaultAttribute ()
1547 ClearValueBuffer ();
1551 int quoteChar = ReadChar ();
1553 if (quoteChar != '\'' && quoteChar != '\"')
1554 throw NotWFError ("an attribute value was not quoted");
1556 AppendValueChar (quoteChar);
1558 while (PeekChar () != quoteChar) {
1559 int ch = ReadChar ();
1564 throw NotWFError ("attribute values cannot contain '<'");
1566 throw NotWFError ("unexpected end of file in an attribute value");
1568 AppendValueChar (ch);
1569 if (PeekChar () == '#')
1571 // Check XML 1.0 section 3.1 WFC.
1572 string entName = ReadName ();
1574 if (XmlChar.GetPredefinedEntity (entName) < 0) {
1575 DTDEntityDeclaration entDecl =
1576 DTD == null ? null : DTD.EntityDecls [entName];
1577 if (entDecl == null || entDecl.SystemId != null)
1578 // WFC: Entity Declared (see 4.1)
1579 if (DTD.IsStandalone || (DTD.SystemId == null && !DTD.InternalSubsetHasPEReference))
1580 throw NotWFError ("Reference to external entities is not allowed in attribute value.");
1582 valueBuffer.Append (entName);
1583 AppendValueChar (';');
1586 AppendValueChar (ch);
1591 ReadChar (); // quoteChar
1592 AppendValueChar (quoteChar);
1594 return CreateValueString ();
1597 private void PushParserInput (string url)
1601 if (DTD.BaseURI != null && DTD.BaseURI.Length > 0)
1602 baseUri = new Uri (DTD.BaseURI);
1603 } catch (UriFormatException) {
1606 Uri absUri = DTD.Resolver.ResolveUri (baseUri, url);
1607 string absPath = absUri != null ? absUri.ToString () : String.Empty;
1609 foreach (XmlParserInput i in parserInputStack.ToArray ()) {
1610 if (i.BaseURI == absPath)
1611 throw NotWFError ("Nested inclusion is not allowed: " + url);
1613 parserInputStack.Push (currentInput);
1615 Stream s = DTD.Resolver.GetEntity (absUri, null, typeof (Stream)) as Stream;
1616 currentInput = new XmlParserInput (new XmlStreamReader (s), absPath);
1617 } catch (Exception ex) { // FIXME: (wishlist) Bad exception catch ;-(
1618 int line = currentInput == null ? 0 : currentInput.LineNumber;
1619 int col = currentInput == null ? 0 : currentInput.LinePosition;
1620 string bu = (currentInput == null) ? String.Empty : currentInput.BaseURI;
1621 HandleError (new XmlSchemaException ("Specified external entity not found. Target URL is " + url + " .",
1622 line, col, null, bu, ex));
1623 currentInput = new XmlParserInput (new StringReader (String.Empty), absPath);
1627 private void PopParserInput ()
1629 currentInput = parserInputStack.Pop () as XmlParserInput;
1632 private void HandleError (XmlSchemaException ex)
1634 #if DTD_HANDLE_EVENTS
1635 if (this.ValidationEventHandler != null)
1636 ValidationEventHandler (this, new ValidationEventArgs (ex, ex.Message, XmlSeverityType.Error));