2 // System.Xml.DTDReader
5 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 // (C)2003 Atsushi Enomoto
11 // When a parameter entity contains cp section, it should be closed
12 // within that declaration.
14 // Resolution to external entities from different BaseURI fails (it is
15 // the same as MS.NET 1.1, but should be fixed in the future).
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 using System.Collections;
41 using System.Globalization;
45 using System.Xml.Schema;
49 internal class DTDReader : IXmlLineInfo
51 private XmlParserInput currentInput;
52 private Stack parserInputStack;
54 private char [] nameBuffer;
55 private int nameLength;
56 private int nameCapacity;
57 private const int initialNameCapacity = 256;
59 private StringBuilder valueBuffer;
61 private int currentLinkedNodeLineNumber;
62 private int currentLinkedNodeLinePosition;
64 // Parameter entity placeholder
65 private int dtdIncludeSect;
67 private bool normalization;
69 private bool processingInternalSubset;
71 string cachedPublicId;
72 string cachedSystemId;
77 public event ValidationEventHandler ValidationEventHandler;
82 public DTDReader (DTDObjectModel dtd,
84 int startLinePosition)
87 currentLinkedNodeLineNumber = startLineNumber;
88 currentLinkedNodeLinePosition = startLinePosition;
94 public string BaseURI {
95 get { return currentInput.BaseURI; }
98 public bool Normalization {
99 get { return normalization; }
100 set { normalization = value; }
103 public int LineNumber {
104 get { return currentInput.LineNumber; }
107 public int LinePosition {
108 get { return currentInput.LinePosition; }
111 public bool HasLineInfo ()
118 private XmlException NotWFError (string message)
120 return new XmlException (this as IXmlLineInfo, BaseURI, message);
125 parserInputStack = new Stack ();
127 nameBuffer = new char [initialNameCapacity];
129 nameCapacity = initialNameCapacity;
131 valueBuffer = new StringBuilder (512);
134 internal DTDObjectModel GenerateDTDObjectModel ()
137 int originalParserDepth = parserInputStack.Count;
139 if (DTD.InternalSubset != null && DTD.InternalSubset.Length > 0) {
140 this.processingInternalSubset = true;
141 XmlParserInput original = currentInput;
143 currentInput = new XmlParserInput (
144 new StringReader (DTD.InternalSubset),
146 currentLinkedNodeLineNumber,
147 currentLinkedNodeLinePosition);
148 currentInput.AllowTextDecl = false;
150 more = ProcessDTDSubset ();
151 if (PeekChar () == -1 && parserInputStack.Count > 0)
153 } while (more || parserInputStack.Count > originalParserDepth);
154 if (dtdIncludeSect != 0)
155 throw NotWFError ("INCLUDE section is not ended correctly.");
157 currentInput = original;
158 this.processingInternalSubset = false;
160 if (DTD.SystemId != null && DTD.SystemId != String.Empty && DTD.Resolver != null) {
161 PushParserInput (DTD.SystemId);
163 more = ProcessDTDSubset ();
164 if (PeekChar () == -1 && parserInputStack.Count > 1)
166 } while (more || parserInputStack.Count > originalParserDepth + 1);
167 if (dtdIncludeSect != 0)
168 throw NotWFError ("INCLUDE section is not ended correctly.");
172 ArrayList sc = new ArrayList ();
174 // Entity recursion check.
175 foreach (DTDEntityDeclaration ent in DTD.EntityDecls.Values) {
176 if (ent.NotationName != null) {
177 ent.ScanEntityValue (sc);
181 // release unnecessary memory usage
182 DTD.ExternalResources.Clear ();
187 // Read any one of following:
188 // elementdecl, AttlistDecl, EntityDecl, NotationDecl,
189 // PI, Comment, Parameter Entity, or doctype termination char(']')
191 // Returns true if it may have any more contents, or false if not.
192 private bool ProcessDTDSubset ()
195 int c2 = ReadChar ();
201 // It affects on entity references' well-formedness
202 if (this.processingInternalSubset)
203 DTD.InternalSubsetHasPEReference = true;
204 string peName = ReadName ();
206 DTDParameterEntityDeclaration peDecl = GetPEDecl (peName);
209 currentInput.PushPEBuffer (peDecl);
210 // int currentLine = currentInput.LineNumber;
211 // int currentColumn = currentInput.LinePosition;
212 while (currentInput.HasPEBuffer)
215 // FIXME: Implement correct nest-level check.
216 // Don't depend on lineinfo (might not be supplied)
217 // if (currentInput.LineNumber != currentLine ||
218 // currentInput.LinePosition != currentColumn)
219 // throw NotWFError ("Incorrectly nested parameter entity.");
226 // Only read, no store.
227 ReadProcessingInstruction ();
230 CompileDeclaration ();
233 throw NotWFError ("Unexpected end of stream.");
235 throw NotWFError ("Syntax Error after '<' character: " + (char) c);
239 if (dtdIncludeSect == 0)
240 throw NotWFError ("Unbalanced end of INCLUDE/IGNORE section.");
247 throw NotWFError (String.Format ("Syntax Error inside doctypedecl markup : {0}({1})", c2, (char) c2));
249 currentInput.AllowTextDecl = false;
253 private void CompileDeclaration ()
259 // Only read, no store.
267 if (!SkipWhitespace ())
269 "Whitespace is required after '<!ENTITY' in DTD entity declaration.");
271 if (PeekChar () == '%') {
273 if (!SkipWhitespace ()) {
277 // FIXME: Is this allowed? <!ENTITY % %name; ...>
278 // (i.e. Can PE name be replaced by another PE?)
280 if (XmlChar.IsNameChar (PeekChar ()))
281 ReadParameterEntityDecl ();
283 throw NotWFError ("expected name character");
287 DTDEntityDeclaration ent = ReadEntityDecl ();
288 if (DTD.EntityDecls [ent.Name] == null)
289 DTD.EntityDecls.Add (ent.Name, ent);
293 DTDElementDeclaration el = ReadElementDecl ();
294 DTD.ElementDecls.Add (el.Name, el);
297 throw NotWFError ("Syntax Error after '<!E' (ELEMENT or ENTITY must be found)");
302 DTDAttListDeclaration atl = ReadAttListDecl ();
303 DTD.AttListDecls.Add (atl.Name, atl);
307 DTDNotationDeclaration not = ReadNotationDecl ();
308 DTD.NotationDecls.Add (not.Name, not);
311 // conditional sections
315 switch (ReadChar ()) {
318 ExpectAfterWhitespace ('[');
328 throw NotWFError ("Syntax Error after '<!' characters.");
332 private void ReadIgnoreSect ()
334 ExpectAfterWhitespace ('[');
335 int dtdIgnoreSect = 1;
337 while (dtdIgnoreSect > 0) {
338 switch (ReadChar ()) {
340 throw NotWFError ("Unexpected IGNORE section end.");
342 if (PeekChar () != '!')
345 if (PeekChar () != '[')
351 if (PeekChar () != ']')
354 if (PeekChar () != '>')
361 if (dtdIgnoreSect != 0)
362 throw NotWFError ("IGNORE section is not ended correctly.");
365 // The reader is positioned on the head of the name.
366 private DTDElementDeclaration ReadElementDecl ()
368 DTDElementDeclaration decl = new DTDElementDeclaration (DTD);
369 decl.IsInternalSubset = this.processingInternalSubset;
371 if (!SkipWhitespace ())
372 throw NotWFError ("Whitespace is required between '<!ELEMENT' and name in DTD element declaration.");
374 decl.Name = ReadName ();
375 if (!SkipWhitespace ())
376 throw NotWFError ("Whitespace is required between name and content in DTD element declaration.");
378 ReadContentSpec (decl);
380 // This expanding is only allowed as a non-validating parser.
386 // read 'children'(BNF) of contentspec
387 private void ReadContentSpec (DTDElementDeclaration decl)
401 DTDContentModel model = decl.ContentModel;
404 if(PeekChar () == '#') {
405 // Mixed Contents. "#PCDATA" must appear first.
406 decl.IsMixedContent = true;
407 model.Occurence = DTDOccurence.ZeroOrMore;
408 model.OrderType = DTDContentOrderType.Or;
412 while(PeekChar () != ')') {
414 if (PeekChar () == '%') {
421 DTDContentModel elem = new DTDContentModel (DTD, decl.Name);
422 // elem.LineNumber = currentInput.LineNumber;
423 // elem.LinePosition = currentInput.LinePosition;
424 elem.ElementName = ReadName ();
425 this.AddContentModel (model.ChildModels, elem);
430 if (model.ChildModels.Count > 0)
432 else if (PeekChar () == '*')
435 // Non-Mixed Contents
436 model.ChildModels.Add (ReadCP (decl));
439 do { // copied from ReadCP() ...;-)
440 if (PeekChar () == '%') {
444 if(PeekChar ()=='|') {
446 if (model.OrderType == DTDContentOrderType.Seq)
447 throw NotWFError ("Inconsistent choice markup in sequence cp.");
448 model.OrderType = DTDContentOrderType.Or;
451 AddContentModel (model.ChildModels, ReadCP (decl));
454 else if(PeekChar () == ',')
457 if (model.OrderType == DTDContentOrderType.Or)
458 throw NotWFError ("Inconsistent sequence markup in choice cp.");
459 model.OrderType = DTDContentOrderType.Seq;
462 model.ChildModels.Add (ReadCP (decl));
474 model.Occurence = DTDOccurence.Optional;
478 model.Occurence = DTDOccurence.ZeroOrMore;
482 model.Occurence = DTDOccurence.OneOrMore;
491 throw NotWFError ("ContentSpec is missing.");
495 // Read 'cp' (BNF) of contentdecl (BNF)
496 private DTDContentModel ReadCP (DTDElementDeclaration elem)
498 DTDContentModel model = null;
500 if(PeekChar () == '(') {
501 model = new DTDContentModel (DTD, elem.Name);
504 model.ChildModels.Add (ReadCP (elem));
507 if (PeekChar () == '%') {
511 if(PeekChar ()=='|') {
513 if (model.OrderType == DTDContentOrderType.Seq)
514 throw NotWFError ("Inconsistent choice markup in sequence cp.");
515 model.OrderType = DTDContentOrderType.Or;
518 AddContentModel (model.ChildModels, ReadCP (elem));
521 else if(PeekChar () == ',') {
523 if (model.OrderType == DTDContentOrderType.Or)
524 throw NotWFError ("Inconsistent sequence markup in choice cp.");
525 model.OrderType = DTDContentOrderType.Seq;
528 model.ChildModels.Add (ReadCP (elem));
535 ExpectAfterWhitespace (')');
539 model = new DTDContentModel (DTD, elem.Name);
540 model.ElementName = ReadName ();
543 switch(PeekChar ()) {
545 model.Occurence = DTDOccurence.Optional;
549 model.Occurence = DTDOccurence.ZeroOrMore;
553 model.Occurence = DTDOccurence.OneOrMore;
560 private void AddContentModel (DTDContentModelCollection cmc, DTDContentModel cm)
562 if (cm.ElementName != null) {
563 for (int i = 0; i < cmc.Count; i++) {
564 if (cmc [i].ElementName == cm.ElementName) {
565 HandleError (new XmlSchemaException ("Element content must be unique inside mixed content model.",
578 // The reader is positioned on the first name char.
579 private void ReadParameterEntityDecl ()
581 DTDParameterEntityDeclaration decl =
582 new DTDParameterEntityDeclaration (DTD);
583 decl.BaseURI = BaseURI;
584 decl.XmlResolver = DTD.Resolver;
586 decl.Name = ReadName ();
587 if (!SkipWhitespace ())
588 throw NotWFError ("Whitespace is required after name in DTD parameter entity declaration.");
590 if (PeekChar () == 'S' || PeekChar () == 'P') {
591 // read publicId/systemId
593 decl.PublicId = cachedPublicId;
594 decl.SystemId = cachedSystemId;
598 ResolveExternalEntityReplacementText (decl);
601 int quoteChar = ReadChar ();
602 if (quoteChar != '\'' && quoteChar != '"')
603 throw NotWFError ("quotation char was expected.");
610 throw NotWFError ("unexpected end of stream in entity value definition.");
612 if (quoteChar == '"')
615 AppendValueChar ('"');
618 if (quoteChar == '\'')
621 AppendValueChar ('\'');
624 if (XmlChar.IsInvalid (c))
625 throw NotWFError ("Invalid character was used to define parameter entity.");
630 decl.LiteralEntityValue = CreateValueString ();
632 ResolveInternalEntityReplacementText (decl);
634 ExpectAfterWhitespace ('>');
637 if (DTD.PEDecls [decl.Name] == null) {
638 DTD.PEDecls.Add (decl.Name, decl);
642 private void ResolveExternalEntityReplacementText (DTDEntityBase decl)
644 if (decl.SystemId != null && decl.SystemId.Length > 0) {
645 // FIXME: not always it should be read in Element context
646 XmlTextReader xtr = new XmlTextReader (decl.LiteralEntityValue, XmlNodeType.Element, null);
647 if (decl is DTDEntityDeclaration && DTD.EntityDecls [decl.Name] == null) {
648 // GE - also checked as valid contents
649 StringBuilder sb = new StringBuilder ();
650 xtr.Normalization = this.Normalization;
653 sb.Append (xtr.ReadOuterXml ());
654 decl.ReplacementText = sb.ToString ();
658 decl.ReplacementText = xtr.GetRemainder ().ReadToEnd ();
661 decl.ReplacementText = decl.LiteralEntityValue;
664 private void ResolveInternalEntityReplacementText (DTDEntityBase decl)
666 string value = decl.LiteralEntityValue;
667 int len = value.Length;
669 for (int i = 0; i < len; i++) {
676 end = value.IndexOf (';', i);
678 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
680 if (value [i] == '#') {
682 ch = GetCharacterReference (decl, value, ref i, end);
683 if (XmlChar.IsInvalid (ch))
684 throw NotWFError ("Invalid character was used to define parameter entity.");
687 name = value.Substring (i, end - i);
688 // don't expand "general" entity.
689 AppendValueChar ('&');
690 valueBuffer.Append (name);
691 AppendValueChar (';');
695 if (XmlChar.IsInvalid (ch))
696 throw new XmlException (decl, decl.BaseURI, "Invalid character was found in the entity declaration.");
697 AppendValueChar (ch);
701 end = value.IndexOf (';', i);
703 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
704 name = value.Substring (i, end - i);
705 valueBuffer.Append (GetPEValue (name));
709 AppendValueChar (ch);
713 decl.ReplacementText = CreateValueString ();
718 private int GetCharacterReference (DTDEntityBase li, string value, ref int index, int end)
721 if (value [index] == 'x') {
723 ret = int.Parse (value.Substring (index + 1, end - index - 1), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
724 } catch (FormatException) {
725 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
729 ret = int.Parse (value.Substring (index, end - index), CultureInfo.InvariantCulture);
730 } catch (FormatException) {
731 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
738 private string GetPEValue (string peName)
740 DTDParameterEntityDeclaration peDecl = GetPEDecl (peName);
741 return peDecl != null ?
742 peDecl.ReplacementText : String.Empty;
745 private DTDParameterEntityDeclaration GetPEDecl (string peName)
747 DTDParameterEntityDeclaration peDecl =
748 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
749 if (peDecl != null) {
750 if (peDecl.IsInternalSubset)
751 throw NotWFError ("Parameter entity is not allowed in internal subset entity '" + peName + "'");
754 // See XML 1.0 section 4.1 for both WFC and VC.
755 if ((DTD.SystemId == null && !DTD.InternalSubsetHasPEReference) || DTD.IsStandalone)
756 throw NotWFError (String.Format ("Parameter entity '{0}' not found.",peName));
757 HandleError (new XmlSchemaException (
758 "Parameter entity " + peName + " not found.", null));
762 private bool TryExpandPERef ()
764 if (PeekChar () != '%')
766 while (PeekChar () == '%') {
767 TryExpandPERefSpaceKeep ();
773 // Tries to expand parameter entities, but it should not skip spaces
774 private bool TryExpandPERefSpaceKeep ()
776 if (PeekChar () == '%') {
777 if (this.processingInternalSubset)
778 throw NotWFError ("Parameter entity reference is not allowed inside internal subset.");
787 // reader is positioned after '%'
788 private void ExpandPERef ()
790 string peName = ReadName ();
792 DTDParameterEntityDeclaration peDecl =
793 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
794 if (peDecl == null) {
795 HandleError (new XmlSchemaException ("Parameter entity " + peName + " not found.", null));
796 return; // do nothing
798 currentInput.PushPEBuffer (peDecl);
801 // The reader is positioned on the head of the name.
802 private DTDEntityDeclaration ReadEntityDecl ()
804 DTDEntityDeclaration decl = new DTDEntityDeclaration (DTD);
805 decl.BaseURI = BaseURI;
806 decl.XmlResolver = DTD.Resolver;
807 decl.IsInternalSubset = this.processingInternalSubset;
809 decl.Name = ReadName ();
810 if (!SkipWhitespace ())
811 throw NotWFError ("Whitespace is required between name and content in DTD entity declaration.");
814 if (PeekChar () == 'S' || PeekChar () == 'P') {
817 decl.PublicId = cachedPublicId;
818 decl.SystemId = cachedSystemId;
819 if (SkipWhitespace ()) {
820 if (PeekChar () == 'N') {
823 if (!SkipWhitespace ())
824 throw NotWFError ("Whitespace is required after NDATA.");
825 decl.NotationName = ReadName (); // ndata_name
828 if (decl.NotationName == null) {
830 ResolveExternalEntityReplacementText (decl);
833 decl.LiteralEntityValue = String.Empty;
834 decl.ReplacementText = String.Empty;
839 ReadEntityValueDecl (decl);
840 ResolveInternalEntityReplacementText (decl);
843 // This expanding is only allowed as a non-validating parser.
849 private void ReadEntityValueDecl (DTDEntityDeclaration decl)
852 // quotation char will be finally removed on unescaping
853 int quoteChar = ReadChar ();
854 if (quoteChar != '\'' && quoteChar != '"')
855 throw NotWFError ("quotation char was expected.");
858 while (PeekChar () != quoteChar) {
859 int ch = ReadChar ();
862 string name = ReadName ();
864 if (decl.IsInternalSubset)
865 throw NotWFError (String.Format ("Parameter entity is not allowed in internal subset entity '{0}'", name));
866 valueBuffer.Append (GetPEValue (name));
869 throw NotWFError ("unexpected end of stream.");
871 if (this.normalization && XmlChar.IsInvalid (ch))
872 throw NotWFError ("Invalid character was found in the entity declaration.");
873 AppendValueChar (ch);
877 // string value = Dereference (CreateValueString (), false);
878 string value = CreateValueString ();
882 decl.LiteralEntityValue = value;
885 private DTDAttListDeclaration ReadAttListDecl ()
887 TryExpandPERefSpaceKeep ();
888 if (!SkipWhitespace ())
889 throw NotWFError ("Whitespace is required between ATTLIST and name in DTD attlist declaration.");
891 string name = ReadName (); // target element name
892 DTDAttListDeclaration decl =
893 DTD.AttListDecls [name] as DTDAttListDeclaration;
895 decl = new DTDAttListDeclaration (DTD);
896 decl.IsInternalSubset = this.processingInternalSubset;
899 if (!SkipWhitespace ())
900 if (PeekChar () != '>')
901 throw NotWFError ("Whitespace is required between name and content in non-empty DTD attlist declaration.");
905 while (XmlChar.IsNameChar (PeekChar ())) {
906 DTDAttributeDefinition def = ReadAttributeDefinition ();
907 // There must not be two or more ID attributes.
908 if (def.Datatype.TokenizedType == XmlTokenizedType.ID) {
909 for (int i = 0; i < decl.Definitions.Count; i++) {
910 DTDAttributeDefinition d = decl [i];
911 if (d.Datatype.TokenizedType == XmlTokenizedType.ID) {
912 HandleError (new XmlSchemaException ("AttList declaration must not contain two or more ID attributes.",
913 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
918 if (decl [def.Name] == null)
924 // This expanding is only allowed as a non-validating parser.
930 private DTDAttributeDefinition ReadAttributeDefinition ()
932 DTDAttributeDefinition def = new DTDAttributeDefinition (DTD);
933 def.IsInternalSubset = this.processingInternalSubset;
937 def.Name = ReadName ();
938 if (!SkipWhitespace ())
939 throw NotWFError ("Whitespace is required between name and content in DTD attribute definition.");
943 switch(PeekChar ()) {
946 def.Datatype = XmlSchemaDatatype.FromName ("normalizedString", XmlSchema.Namespace);
948 case 'I': // ID, IDREF, IDREFS
950 if(PeekChar () == 'R') {
952 if(PeekChar () == 'S') {
955 def.Datatype = XmlSchemaDatatype.FromName ("IDREFS", XmlSchema.Namespace);
958 def.Datatype = XmlSchemaDatatype.FromName ("IDREF", XmlSchema.Namespace);
961 def.Datatype = XmlSchemaDatatype.FromName ("ID", XmlSchema.Namespace);
963 case 'E': // ENTITY, ENTITIES
965 switch(ReadChar ()) {
967 def.Datatype = XmlSchemaDatatype.FromName ("ENTITY", XmlSchema.Namespace);
969 case 'I': // ENTITIES
971 def.Datatype = XmlSchemaDatatype.FromName ("ENTITIES", XmlSchema.Namespace);
975 case 'N': // NMTOKEN, NMTOKENS, NOTATION
977 switch(PeekChar ()) {
980 if(PeekChar ()=='S') { // NMTOKENS
982 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKENS", XmlSchema.Namespace);
985 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
989 def.Datatype = XmlSchemaDatatype.FromName ("NOTATION", XmlSchema.Namespace);
990 if (!SkipWhitespace ())
991 throw NotWFError ("Whitespace is required between name and content in DTD attribute definition.");
994 def.EnumeratedNotations.Add (ReadName ()); // notation name
996 while(PeekChar () == '|') {
999 def.EnumeratedNotations.Add (ReadName ()); // notation name
1005 throw NotWFError ("attribute declaration syntax error.");
1008 default: // Enumerated Values
1009 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
1013 def.EnumeratedAttributeDeclaration.Add (
1014 def.Datatype.Normalize (ReadNmToken ())); // enum value
1016 while(PeekChar () == '|') {
1019 def.EnumeratedAttributeDeclaration.Add (
1020 def.Datatype.Normalize (ReadNmToken ())); // enum value
1026 TryExpandPERefSpaceKeep ();
1027 if (!SkipWhitespace ())
1028 throw NotWFError ("Whitespace is required between type and occurence in DTD attribute definition.");
1031 ReadAttributeDefaultValue (def);
1036 private void ReadAttributeDefaultValue (DTDAttributeDefinition def)
1038 if(PeekChar () == '#')
1044 Expect ("REQUIRED");
1045 def.OccurenceType = DTDAttributeOccurenceType.Required;
1049 def.OccurenceType = DTDAttributeOccurenceType.Optional;
1053 def.OccurenceType = DTDAttributeOccurenceType.Fixed;
1054 if (!SkipWhitespace ())
1055 throw NotWFError ("Whitespace is required between FIXED and actual value in DTD attribute definition.");
1056 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1060 // one of the enumerated value
1063 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1066 // VC: If default value exists, it should be valid.
1067 if (def.DefaultValue != null) {
1068 string normalized = def.Datatype.Normalize (def.DefaultValue);
1069 bool breakup = false;
1070 object parsed = null;
1072 // enumeration validity
1073 if (def.EnumeratedAttributeDeclaration.Count > 0) {
1074 if (!def.EnumeratedAttributeDeclaration.Contains (normalized)) {
1075 HandleError (new XmlSchemaException ("Default value is not one of the enumerated values.",
1076 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1080 if (def.EnumeratedNotations.Count > 0) {
1081 if (!def.EnumeratedNotations.Contains (normalized)) {
1082 HandleError (new XmlSchemaException ("Default value is not one of the enumerated notation values.",
1083 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1088 // type based validity
1091 parsed = def.Datatype.ParseValue (normalized, DTD.NameTable, null);
1092 } catch (Exception ex) { // FIXME: (wishlist) bad catch ;-(
1093 HandleError (new XmlSchemaException ("Invalid default value for ENTITY type.",
1094 def.LineNumber, def.LinePosition, null, def.BaseURI, ex));
1099 switch (def.Datatype.TokenizedType) {
1100 case XmlTokenizedType.ENTITY:
1101 if (DTD.EntityDecls [normalized] == null)
1102 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1103 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1105 case XmlTokenizedType.ENTITIES:
1106 string [] entities = parsed as string [];
1107 for (int i = 0; i < entities.Length; i++) {
1108 string entity = entities [i];
1109 if (DTD.EntityDecls [entity] == null)
1110 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1111 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1117 // Extra ID attribute validity check.
1118 if (def.Datatype != null && def.Datatype.TokenizedType == XmlTokenizedType.ID)
1119 if (def.UnresolvedDefaultValue != null)
1120 HandleError (new XmlSchemaException ("ID attribute must not have fixed value constraint.",
1121 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1125 private DTDNotationDeclaration ReadNotationDecl()
1127 DTDNotationDeclaration decl = new DTDNotationDeclaration (DTD);
1128 if (!SkipWhitespace ())
1129 throw NotWFError ("Whitespace is required between NOTATION and name in DTD notation declaration.");
1131 decl.Name = ReadName (); // notation name
1133 if (namespaces) { // copy from SetProperties ;-)
1134 int indexOfColon = decl.Name.IndexOf (':');
1136 if (indexOfColon == -1) {
1137 decl.Prefix = String.Empty;
1138 decl.LocalName = decl.Name;
1140 decl.Prefix = decl.Name.Substring (0, indexOfColon);
1141 decl.LocalName = decl.Name.Substring (indexOfColon + 1);
1145 decl.Prefix = String.Empty;
1146 decl.LocalName = decl.Name;
1150 if(PeekChar () == 'P') {
1151 decl.PublicId = ReadPubidLiteral ();
1152 bool wsSkipped = SkipWhitespace ();
1153 if (PeekChar () == '\'' || PeekChar () == '"') {
1155 throw NotWFError ("Whitespace is required between public id and system id.");
1156 decl.SystemId = ReadSystemLiteral (false);
1159 } else if(PeekChar () == 'S') {
1160 decl.SystemId = ReadSystemLiteral (true);
1163 if(decl.PublicId == null && decl.SystemId == null)
1164 throw NotWFError ("public or system declaration required for \"NOTATION\" declaration.");
1165 // This expanding is only allowed as a non-validating parser.
1171 private void ReadExternalID () {
1172 switch (PeekChar ()) {
1174 cachedSystemId = ReadSystemLiteral (true);
1177 cachedPublicId = ReadPubidLiteral ();
1178 if (!SkipWhitespace ())
1179 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
1180 cachedSystemId = ReadSystemLiteral (false);
1185 // The reader is positioned on the first 'S' of "SYSTEM".
1186 private string ReadSystemLiteral (bool expectSYSTEM)
1190 if (!SkipWhitespace ())
1191 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
1195 int quoteChar = ReadChar (); // apos or quot
1197 ClearValueBuffer ();
1198 while (c != quoteChar) {
1201 throw NotWFError ("Unexpected end of stream in ExternalID.");
1203 AppendValueChar (c);
1205 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1208 private string ReadPubidLiteral()
1211 if (!SkipWhitespace ())
1212 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
1213 int quoteChar = ReadChar ();
1215 ClearValueBuffer ();
1216 while(c != quoteChar)
1219 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
1220 if(c != quoteChar && !XmlChar.IsPubidChar (c))
1221 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char) c));
1223 AppendValueChar (c);
1225 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1228 // The reader is positioned on the first character
1230 internal string ReadName ()
1232 return ReadNameOrNmToken(false);
1235 // The reader is positioned on the first character
1237 private string ReadNmToken ()
1239 return ReadNameOrNmToken(true);
1242 private string ReadNameOrNmToken(bool isNameToken)
1244 int ch = PeekChar ();
1246 if (!XmlChar.IsNameChar (ch))
1247 throw NotWFError (String.Format ("a nmtoken did not start with a legal character {0} ({1})", ch, (char) ch));
1250 if (!XmlChar.IsFirstNameChar (ch))
1251 throw NotWFError (String.Format ("a name did not start with a legal character {0} ({1})", ch, (char) ch));
1256 AppendNameChar (ReadChar ());
1258 while (XmlChar.IsNameChar (PeekChar ())) {
1259 AppendNameChar (ReadChar ());
1262 return CreateNameString ();
1265 // Read the next character and compare it against the
1266 // specified character.
1267 private void Expect (int expected)
1269 int ch = ReadChar ();
1271 if (ch != expected) {
1272 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1273 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1281 private void Expect (string expected)
1283 int len = expected.Length;
1284 for (int i=0; i< len; i++)
1285 Expect (expected [i]);
1288 private void ExpectAfterWhitespace (char c)
1291 int i = ReadChar ();
1292 if (XmlChar.IsWhitespace (i))
1295 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0} but found {1} [{2}].", c, (char) i, i));
1300 // Does not consume the first non-whitespace character.
1301 private bool SkipWhitespace ()
1303 bool skipped = XmlChar.IsWhitespace (PeekChar ());
1304 while (XmlChar.IsWhitespace (PeekChar ()))
1309 private int PeekChar ()
1311 return currentInput.PeekChar ();
1314 private int ReadChar ()
1316 return currentInput.ReadChar ();
1319 // The reader is positioned on the first character after
1320 // the leading '<!--'.
1321 private void ReadComment ()
1323 currentInput.AllowTextDecl = false;
1325 while (PeekChar () != -1) {
1326 int ch = ReadChar ();
1328 if (ch == '-' && PeekChar () == '-') {
1331 if (PeekChar () != '>')
1332 throw NotWFError ("comments cannot contain '--'");
1338 if (XmlChar.IsInvalid (ch))
1339 throw NotWFError ("Not allowed character was found.");
1343 // The reader is positioned on the first character
1346 // It may be xml declaration or processing instruction.
1347 private void ReadProcessingInstruction ()
1349 string target = ReadName ();
1350 if (target == "xml") {
1351 ReadTextDeclaration ();
1353 } else if (String.Compare (target, "xml", true, CultureInfo.InvariantCulture) == 0)
1354 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1356 currentInput.AllowTextDecl = false;
1358 if (!SkipWhitespace ())
1359 if (PeekChar () != '?')
1360 throw NotWFError ("Invalid processing instruction name was found.");
1362 while (PeekChar () != -1) {
1363 int ch = ReadChar ();
1365 if (ch == '?' && PeekChar () == '>') {
1372 // The reader is positioned after "<?xml "
1373 private void ReadTextDeclaration ()
1375 if (!currentInput.AllowTextDecl)
1376 throw NotWFError ("Text declaration cannot appear in this state.");
1378 currentInput.AllowTextDecl = false;
1383 if (PeekChar () == 'v') {
1385 ExpectAfterWhitespace ('=');
1387 int quoteChar = ReadChar ();
1388 char [] expect1_0 = new char [3];
1389 int versionLength = 0;
1390 switch (quoteChar) {
1393 while (PeekChar () != quoteChar) {
1394 if (PeekChar () == -1)
1395 throw NotWFError ("Invalid version declaration inside text declaration.");
1396 else if (versionLength == 3)
1397 throw NotWFError ("Invalid version number inside text declaration.");
1399 expect1_0 [versionLength] = (char) ReadChar ();
1401 if (versionLength == 3 && new String (expect1_0) != "1.0")
1402 throw NotWFError ("Invalid version number inside text declaration.");
1409 throw NotWFError ("Invalid version declaration inside text declaration.");
1413 if (PeekChar () == 'e') {
1414 Expect ("encoding");
1415 ExpectAfterWhitespace ('=');
1417 int quoteChar = ReadChar ();
1418 switch (quoteChar) {
1421 while (PeekChar () != quoteChar)
1422 if (ReadChar () == -1)
1423 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1428 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1430 // Encoding value should be checked inside XmlInputStream.
1433 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
1438 // Note that now this method behaves differently from
1439 // XmlTextReader's one. It calles AppendValueChar() internally.
1440 private int ReadCharacterReference ()
1444 if (PeekChar () == 'x') {
1447 while (PeekChar () != ';' && PeekChar () != -1) {
1448 int ch = ReadChar ();
1450 if (ch >= '0' && ch <= '9')
1451 value = (value << 4) + ch - '0';
1452 else if (ch >= 'A' && ch <= 'F')
1453 value = (value << 4) + ch - 'A' + 10;
1454 else if (ch >= 'a' && ch <= 'f')
1455 value = (value << 4) + ch - 'a' + 10;
1457 throw NotWFError (String.Format (
1458 CultureInfo.InvariantCulture,
1459 "invalid hexadecimal digit: {0} (#x{1:X})",
1464 while (PeekChar () != ';' && PeekChar () != -1) {
1465 int ch = ReadChar ();
1467 if (ch >= '0' && ch <= '9')
1468 value = value * 10 + ch - '0';
1470 throw NotWFError (String.Format (
1471 CultureInfo.InvariantCulture,
1472 "invalid decimal digit: {0} (#x{1:X})",
1480 // There is no way to save surrogate pairs...
1481 if (XmlChar.IsInvalid (value))
1482 throw NotWFError ("Referenced character was not allowed in XML.");
1483 AppendValueChar (value);
1487 private void AppendNameChar (int ch)
1489 CheckNameCapacity ();
1490 if (ch < Char.MaxValue)
1491 nameBuffer [nameLength++] = (char) ch;
1493 nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1494 CheckNameCapacity ();
1495 nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00);
1499 private void CheckNameCapacity ()
1501 if (nameLength == nameCapacity) {
1502 nameCapacity = nameCapacity * 2;
1503 char [] oldNameBuffer = nameBuffer;
1504 nameBuffer = new char [nameCapacity];
1505 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1509 private string CreateNameString ()
1511 return DTD.NameTable.Add (nameBuffer, 0, nameLength);
1514 private void AppendValueChar (int ch)
1516 //See http://www.faqs.org/rfcs/rfc2781.html for used algorithm
1518 valueBuffer.Append ((char) ch);
1522 throw new XmlException ("The numeric entity value is too large", null, LineNumber, LinePosition);
1525 int utag = ch - 0x10000;
1526 valueBuffer.Append((char) ((utag >> 10) + 0xD800));
1527 valueBuffer.Append((char) ((utag & 0x3FF) + 0xDC00));
1531 private string CreateValueString ()
1533 return valueBuffer.ToString ();
1536 private void ClearValueBuffer ()
1538 valueBuffer.Length = 0;
1541 // The reader is positioned on the quote character.
1542 // *Keeps quote char* to value to get_QuoteChar() correctly.
1543 private string ReadDefaultAttribute ()
1545 ClearValueBuffer ();
1549 int quoteChar = ReadChar ();
1551 if (quoteChar != '\'' && quoteChar != '\"')
1552 throw NotWFError ("an attribute value was not quoted");
1554 AppendValueChar (quoteChar);
1556 while (PeekChar () != quoteChar) {
1557 int ch = ReadChar ();
1562 throw NotWFError ("attribute values cannot contain '<'");
1564 throw NotWFError ("unexpected end of file in an attribute value");
1566 AppendValueChar (ch);
1567 if (PeekChar () == '#')
1569 // Check XML 1.0 section 3.1 WFC.
1570 string entName = ReadName ();
1572 if (XmlChar.GetPredefinedEntity (entName) < 0) {
1573 DTDEntityDeclaration entDecl =
1574 DTD == null ? null : DTD.EntityDecls [entName];
1575 if (entDecl == null || entDecl.SystemId != null)
1576 // WFC: Entity Declared (see 4.1)
1577 if (DTD.IsStandalone || (DTD.SystemId == null && !DTD.InternalSubsetHasPEReference))
1578 throw NotWFError ("Reference to external entities is not allowed in attribute value.");
1580 valueBuffer.Append (entName);
1581 AppendValueChar (';');
1584 AppendValueChar (ch);
1589 ReadChar (); // quoteChar
1590 AppendValueChar (quoteChar);
1592 return CreateValueString ();
1595 private void PushParserInput (string url)
1599 if (DTD.BaseURI != null && DTD.BaseURI.Length > 0)
1600 baseUri = new Uri (DTD.BaseURI);
1601 } catch (UriFormatException) {
1604 Uri absUri = url != null && url.Length > 0 ?
1605 DTD.Resolver.ResolveUri (baseUri, url) : baseUri;
1606 string absPath = absUri != null ? absUri.ToString () : String.Empty;
1608 foreach (XmlParserInput i in parserInputStack.ToArray ()) {
1609 if (i.BaseURI == absPath)
1610 throw NotWFError ("Nested inclusion is not allowed: " + url);
1612 parserInputStack.Push (currentInput);
1615 s = DTD.Resolver.GetEntity (absUri, null, typeof (Stream)) as Stream;
1616 currentInput = new XmlParserInput (new XmlStreamReader (s), absPath);
1617 } catch (Exception ex) { // FIXME: (wishlist) Bad exception catch ;-(
1620 int line = currentInput == null ? 0 : currentInput.LineNumber;
1621 int col = currentInput == null ? 0 : currentInput.LinePosition;
1622 string bu = (currentInput == null) ? String.Empty : currentInput.BaseURI;
1623 HandleError (new XmlSchemaException ("Specified external entity not found. Target URL is " + url + " .",
1624 line, col, null, bu, ex));
1625 currentInput = new XmlParserInput (new StringReader (String.Empty), absPath);
1629 private void PopParserInput ()
1631 currentInput.Close ();
1632 currentInput = parserInputStack.Pop () as XmlParserInput;
1635 private void HandleError (XmlSchemaException ex)
1637 #if DTD_HANDLE_EVENTS
1638 if (this.ValidationEventHandler != null)
1639 ValidationEventHandler (this, new ValidationEventArgs (ex, ex.Message, XmlSeverityType.Error));