2 // System.Xml.DTDReader
5 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 // (C)2003 Atsushi Enomoto
11 // When a parameter entity contains cp section, it should be closed
12 // within that declaration.
14 // Resolution to external entities from different BaseURI fails (it is
15 // the same as MS.NET 1.1, but should be fixed in the future).
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 using System.Collections;
41 using System.Globalization;
46 using XmlSchemaException = System.Xml.XmlException;
48 using System.Xml.Schema;
53 internal class DTDReader : IXmlLineInfo
55 private XmlParserInput currentInput;
56 private Stack parserInputStack;
58 private char [] nameBuffer;
59 private int nameLength;
60 private int nameCapacity;
61 private const int initialNameCapacity = 256;
63 private StringBuilder valueBuffer;
65 private int currentLinkedNodeLineNumber;
66 private int currentLinkedNodeLinePosition;
68 // Parameter entity placeholder
69 private int dtdIncludeSect;
71 private bool normalization;
73 private bool processingInternalSubset;
75 string cachedPublicId;
76 string cachedSystemId;
81 public event ValidationEventHandler ValidationEventHandler;
86 public DTDReader (DTDObjectModel dtd,
88 int startLinePosition)
91 currentLinkedNodeLineNumber = startLineNumber;
92 currentLinkedNodeLinePosition = startLinePosition;
98 public string BaseURI {
99 get { return currentInput.BaseURI; }
102 public bool Normalization {
103 get { return normalization; }
104 set { normalization = value; }
107 public int LineNumber {
108 get { return currentInput.LineNumber; }
111 public int LinePosition {
112 get { return currentInput.LinePosition; }
115 public bool HasLineInfo ()
122 private XmlException NotWFError (string message)
124 return new XmlException (this as IXmlLineInfo, BaseURI, message);
129 parserInputStack = new Stack ();
131 nameBuffer = new char [initialNameCapacity];
133 nameCapacity = initialNameCapacity;
135 valueBuffer = new StringBuilder (512);
138 internal DTDObjectModel GenerateDTDObjectModel ()
141 int originalParserDepth = parserInputStack.Count;
143 if (DTD.InternalSubset != null && DTD.InternalSubset.Length > 0) {
144 this.processingInternalSubset = true;
145 XmlParserInput original = currentInput;
147 currentInput = new XmlParserInput (
148 new StringReader (DTD.InternalSubset),
150 currentLinkedNodeLineNumber,
151 currentLinkedNodeLinePosition);
152 currentInput.AllowTextDecl = false;
154 more = ProcessDTDSubset ();
155 if (PeekChar () == -1 && parserInputStack.Count > 0)
157 } while (more || parserInputStack.Count > originalParserDepth);
158 if (dtdIncludeSect != 0)
159 throw NotWFError ("INCLUDE section is not ended correctly.");
161 currentInput = original;
162 this.processingInternalSubset = false;
164 if (DTD.SystemId != null && DTD.SystemId != String.Empty && DTD.Resolver != null) {
165 PushParserInput (DTD.SystemId);
167 more = ProcessDTDSubset ();
168 if (PeekChar () == -1 && parserInputStack.Count > 1)
170 } while (more || parserInputStack.Count > originalParserDepth + 1);
171 if (dtdIncludeSect != 0)
172 throw NotWFError ("INCLUDE section is not ended correctly.");
176 ArrayList sc = new ArrayList ();
178 // Entity recursion check.
179 foreach (DTDEntityDeclaration ent in DTD.EntityDecls.Values) {
180 if (ent.NotationName != null) {
181 ent.ScanEntityValue (sc);
185 // release unnecessary memory usage
186 DTD.ExternalResources.Clear ();
191 // Read any one of following:
192 // elementdecl, AttlistDecl, EntityDecl, NotationDecl,
193 // PI, Comment, Parameter Entity, or doctype termination char(']')
195 // Returns true if it may have any more contents, or false if not.
196 private bool ProcessDTDSubset ()
199 int c2 = ReadChar ();
205 // It affects on entity references' well-formedness
206 if (this.processingInternalSubset)
207 DTD.InternalSubsetHasPEReference = true;
208 string peName = ReadName ();
210 DTDParameterEntityDeclaration peDecl = GetPEDecl (peName);
213 currentInput.PushPEBuffer (peDecl);
214 // int currentLine = currentInput.LineNumber;
215 // int currentColumn = currentInput.LinePosition;
216 while (currentInput.HasPEBuffer)
219 // FIXME: Implement correct nest-level check.
220 // Don't depend on lineinfo (might not be supplied)
221 // if (currentInput.LineNumber != currentLine ||
222 // currentInput.LinePosition != currentColumn)
223 // throw NotWFError ("Incorrectly nested parameter entity.");
230 // Only read, no store.
231 ReadProcessingInstruction ();
234 CompileDeclaration ();
237 throw NotWFError ("Unexpected end of stream.");
239 throw NotWFError ("Syntax Error after '<' character: " + (char) c);
243 if (dtdIncludeSect == 0)
244 throw NotWFError ("Unbalanced end of INCLUDE/IGNORE section.");
251 throw NotWFError (String.Format ("Syntax Error inside doctypedecl markup : {0}({1})", c2, (char) c2));
253 currentInput.AllowTextDecl = false;
257 private void CompileDeclaration ()
263 // Only read, no store.
271 if (!SkipWhitespace ())
273 "Whitespace is required after '<!ENTITY' in DTD entity declaration.");
275 if (PeekChar () == '%') {
277 if (!SkipWhitespace ()) {
281 // FIXME: Is this allowed? <!ENTITY % %name; ...>
282 // (i.e. Can PE name be replaced by another PE?)
284 if (XmlChar.IsNameChar (PeekChar ()))
285 ReadParameterEntityDecl ();
287 throw NotWFError ("expected name character");
291 DTDEntityDeclaration ent = ReadEntityDecl ();
292 if (DTD.EntityDecls [ent.Name] == null)
293 DTD.EntityDecls.Add (ent.Name, ent);
297 DTDElementDeclaration el = ReadElementDecl ();
298 DTD.ElementDecls.Add (el.Name, el);
301 throw NotWFError ("Syntax Error after '<!E' (ELEMENT or ENTITY must be found)");
306 DTDAttListDeclaration atl = ReadAttListDecl ();
307 DTD.AttListDecls.Add (atl.Name, atl);
311 DTDNotationDeclaration not = ReadNotationDecl ();
312 DTD.NotationDecls.Add (not.Name, not);
315 // conditional sections
319 switch (ReadChar ()) {
322 ExpectAfterWhitespace ('[');
332 throw NotWFError ("Syntax Error after '<!' characters.");
336 private void ReadIgnoreSect ()
338 ExpectAfterWhitespace ('[');
339 int dtdIgnoreSect = 1;
341 while (dtdIgnoreSect > 0) {
342 switch (ReadChar ()) {
344 throw NotWFError ("Unexpected IGNORE section end.");
346 if (PeekChar () != '!')
349 if (PeekChar () != '[')
355 if (PeekChar () != ']')
358 if (PeekChar () != '>')
365 if (dtdIgnoreSect != 0)
366 throw NotWFError ("IGNORE section is not ended correctly.");
369 // The reader is positioned on the head of the name.
370 private DTDElementDeclaration ReadElementDecl ()
372 DTDElementDeclaration decl = new DTDElementDeclaration (DTD);
373 decl.IsInternalSubset = this.processingInternalSubset;
375 if (!SkipWhitespace ())
376 throw NotWFError ("Whitespace is required between '<!ELEMENT' and name in DTD element declaration.");
378 decl.Name = ReadName ();
379 if (!SkipWhitespace ())
380 throw NotWFError ("Whitespace is required between name and content in DTD element declaration.");
382 ReadContentSpec (decl);
384 // This expanding is only allowed as a non-validating parser.
390 // read 'children'(BNF) of contentspec
391 private void ReadContentSpec (DTDElementDeclaration decl)
405 DTDContentModel model = decl.ContentModel;
408 if(PeekChar () == '#') {
409 // Mixed Contents. "#PCDATA" must appear first.
410 decl.IsMixedContent = true;
411 model.Occurence = DTDOccurence.ZeroOrMore;
412 model.OrderType = DTDContentOrderType.Or;
416 while(PeekChar () != ')') {
418 if (PeekChar () == '%') {
425 DTDContentModel elem = new DTDContentModel (DTD, decl.Name);
426 // elem.LineNumber = currentInput.LineNumber;
427 // elem.LinePosition = currentInput.LinePosition;
428 elem.ElementName = ReadName ();
429 this.AddContentModel (model.ChildModels, elem);
434 if (model.ChildModels.Count > 0)
436 else if (PeekChar () == '*')
439 // Non-Mixed Contents
440 model.ChildModels.Add (ReadCP (decl));
443 do { // copied from ReadCP() ...;-)
444 if (PeekChar () == '%') {
448 if(PeekChar ()=='|') {
450 if (model.OrderType == DTDContentOrderType.Seq)
451 throw NotWFError ("Inconsistent choice markup in sequence cp.");
452 model.OrderType = DTDContentOrderType.Or;
455 AddContentModel (model.ChildModels, ReadCP (decl));
458 else if(PeekChar () == ',')
461 if (model.OrderType == DTDContentOrderType.Or)
462 throw NotWFError ("Inconsistent sequence markup in choice cp.");
463 model.OrderType = DTDContentOrderType.Seq;
466 model.ChildModels.Add (ReadCP (decl));
478 model.Occurence = DTDOccurence.Optional;
482 model.Occurence = DTDOccurence.ZeroOrMore;
486 model.Occurence = DTDOccurence.OneOrMore;
495 throw NotWFError ("ContentSpec is missing.");
499 // Read 'cp' (BNF) of contentdecl (BNF)
500 private DTDContentModel ReadCP (DTDElementDeclaration elem)
502 DTDContentModel model = null;
504 if(PeekChar () == '(') {
505 model = new DTDContentModel (DTD, elem.Name);
508 model.ChildModels.Add (ReadCP (elem));
511 if (PeekChar () == '%') {
515 if(PeekChar ()=='|') {
517 if (model.OrderType == DTDContentOrderType.Seq)
518 throw NotWFError ("Inconsistent choice markup in sequence cp.");
519 model.OrderType = DTDContentOrderType.Or;
522 AddContentModel (model.ChildModels, ReadCP (elem));
525 else if(PeekChar () == ',') {
527 if (model.OrderType == DTDContentOrderType.Or)
528 throw NotWFError ("Inconsistent sequence markup in choice cp.");
529 model.OrderType = DTDContentOrderType.Seq;
532 model.ChildModels.Add (ReadCP (elem));
539 ExpectAfterWhitespace (')');
543 model = new DTDContentModel (DTD, elem.Name);
544 model.ElementName = ReadName ();
547 switch(PeekChar ()) {
549 model.Occurence = DTDOccurence.Optional;
553 model.Occurence = DTDOccurence.ZeroOrMore;
557 model.Occurence = DTDOccurence.OneOrMore;
564 private void AddContentModel (DTDContentModelCollection cmc, DTDContentModel cm)
566 if (cm.ElementName != null) {
567 for (int i = 0; i < cmc.Count; i++) {
568 if (cmc [i].ElementName == cm.ElementName) {
569 HandleError (new XmlSchemaException ("Element content must be unique inside mixed content model.",
582 // The reader is positioned on the first name char.
583 private void ReadParameterEntityDecl ()
585 DTDParameterEntityDeclaration decl =
586 new DTDParameterEntityDeclaration (DTD);
587 decl.BaseURI = BaseURI;
588 decl.XmlResolver = DTD.Resolver;
590 decl.Name = ReadName ();
591 if (!SkipWhitespace ())
592 throw NotWFError ("Whitespace is required after name in DTD parameter entity declaration.");
594 if (PeekChar () == 'S' || PeekChar () == 'P') {
595 // read publicId/systemId
597 decl.PublicId = cachedPublicId;
598 decl.SystemId = cachedSystemId;
602 ResolveExternalEntityReplacementText (decl);
605 int quoteChar = ReadChar ();
606 if (quoteChar != '\'' && quoteChar != '"')
607 throw NotWFError ("quotation char was expected.");
614 throw NotWFError ("unexpected end of stream in entity value definition.");
616 if (quoteChar == '"')
619 AppendValueChar ('"');
622 if (quoteChar == '\'')
625 AppendValueChar ('\'');
628 if (XmlChar.IsInvalid (c))
629 throw NotWFError ("Invalid character was used to define parameter entity.");
634 decl.LiteralEntityValue = CreateValueString ();
636 ResolveInternalEntityReplacementText (decl);
638 ExpectAfterWhitespace ('>');
641 if (DTD.PEDecls [decl.Name] == null) {
642 DTD.PEDecls.Add (decl.Name, decl);
646 private void ResolveExternalEntityReplacementText (DTDEntityBase decl)
648 if (decl.SystemId != null && decl.SystemId.Length > 0) {
649 // FIXME: not always it should be read in Element context
650 XmlTextReader xtr = new XmlTextReader (decl.LiteralEntityValue, XmlNodeType.Element, null);
651 xtr.SkipTextDeclaration ();
652 if (decl is DTDEntityDeclaration && DTD.EntityDecls [decl.Name] == null) {
653 // GE - also checked as valid contents
654 StringBuilder sb = new StringBuilder ();
655 xtr.Normalization = this.Normalization;
658 sb.Append (xtr.ReadOuterXml ());
659 decl.ReplacementText = sb.ToString ();
663 decl.ReplacementText = xtr.GetRemainder ().ReadToEnd ();
666 decl.ReplacementText = decl.LiteralEntityValue;
669 private void ResolveInternalEntityReplacementText (DTDEntityBase decl)
671 string value = decl.LiteralEntityValue;
672 int len = value.Length;
674 for (int i = 0; i < len; i++) {
681 end = value.IndexOf (';', i);
683 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
685 if (value [i] == '#') {
687 ch = GetCharacterReference (decl, value, ref i, end);
688 if (XmlChar.IsInvalid (ch))
689 throw NotWFError ("Invalid character was used to define parameter entity.");
692 name = value.Substring (i, end - i);
693 if (!XmlChar.IsName (name))
694 throw NotWFError (String.Format ("'{0}' is not a valid entity reference name.", name));
695 // don't expand "general" entity.
696 AppendValueChar ('&');
697 valueBuffer.Append (name);
698 AppendValueChar (';');
702 if (XmlChar.IsInvalid (ch))
703 throw new XmlException (decl, decl.BaseURI, "Invalid character was found in the entity declaration.");
704 AppendValueChar (ch);
708 end = value.IndexOf (';', i);
710 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
711 name = value.Substring (i, end - i);
712 valueBuffer.Append (GetPEValue (name));
716 AppendValueChar (ch);
720 decl.ReplacementText = CreateValueString ();
725 private int GetCharacterReference (DTDEntityBase li, string value, ref int index, int end)
728 if (value [index] == 'x') {
730 ret = int.Parse (value.Substring (index + 1, end - index - 1), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
731 } catch (FormatException) {
732 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
736 ret = int.Parse (value.Substring (index, end - index), CultureInfo.InvariantCulture);
737 } catch (FormatException) {
738 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
745 private string GetPEValue (string peName)
747 DTDParameterEntityDeclaration peDecl = GetPEDecl (peName);
748 return peDecl != null ?
749 peDecl.ReplacementText : String.Empty;
752 private DTDParameterEntityDeclaration GetPEDecl (string peName)
754 DTDParameterEntityDeclaration peDecl =
755 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
756 if (peDecl != null) {
757 if (peDecl.IsInternalSubset)
758 throw NotWFError ("Parameter entity is not allowed in internal subset entity '" + peName + "'");
761 // See XML 1.0 section 4.1 for both WFC and VC.
762 if ((DTD.SystemId == null && !DTD.InternalSubsetHasPEReference) || DTD.IsStandalone)
763 throw NotWFError (String.Format ("Parameter entity '{0}' not found.",peName));
764 HandleError (new XmlSchemaException (
765 "Parameter entity " + peName + " not found.", null));
769 private bool TryExpandPERef ()
771 if (PeekChar () != '%')
773 while (PeekChar () == '%') {
774 TryExpandPERefSpaceKeep ();
780 // Tries to expand parameter entities, but it should not skip spaces
781 private bool TryExpandPERefSpaceKeep ()
783 if (PeekChar () == '%') {
784 if (this.processingInternalSubset)
785 throw NotWFError ("Parameter entity reference is not allowed inside internal subset.");
794 // reader is positioned after '%'
795 private void ExpandPERef ()
797 string peName = ReadName ();
799 DTDParameterEntityDeclaration peDecl =
800 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
801 if (peDecl == null) {
802 HandleError (new XmlSchemaException ("Parameter entity " + peName + " not found.", null));
803 return; // do nothing
805 currentInput.PushPEBuffer (peDecl);
808 // The reader is positioned on the head of the name.
809 private DTDEntityDeclaration ReadEntityDecl ()
811 DTDEntityDeclaration decl = new DTDEntityDeclaration (DTD);
812 decl.BaseURI = BaseURI;
813 decl.XmlResolver = DTD.Resolver;
814 decl.IsInternalSubset = this.processingInternalSubset;
816 decl.Name = ReadName ();
817 if (!SkipWhitespace ())
818 throw NotWFError ("Whitespace is required between name and content in DTD entity declaration.");
821 if (PeekChar () == 'S' || PeekChar () == 'P') {
824 decl.PublicId = cachedPublicId;
825 decl.SystemId = cachedSystemId;
826 if (SkipWhitespace ()) {
827 if (PeekChar () == 'N') {
830 if (!SkipWhitespace ())
831 throw NotWFError ("Whitespace is required after NDATA.");
832 decl.NotationName = ReadName (); // ndata_name
835 if (decl.NotationName == null) {
837 ResolveExternalEntityReplacementText (decl);
840 decl.LiteralEntityValue = String.Empty;
841 decl.ReplacementText = String.Empty;
846 ReadEntityValueDecl (decl);
847 ResolveInternalEntityReplacementText (decl);
850 // This expanding is only allowed as a non-validating parser.
856 private void ReadEntityValueDecl (DTDEntityDeclaration decl)
859 // quotation char will be finally removed on unescaping
860 int quoteChar = ReadChar ();
861 if (quoteChar != '\'' && quoteChar != '"')
862 throw NotWFError ("quotation char was expected.");
865 while (PeekChar () != quoteChar) {
866 int ch = ReadChar ();
869 string name = ReadName ();
871 if (decl.IsInternalSubset)
872 throw NotWFError (String.Format ("Parameter entity is not allowed in internal subset entity '{0}'", name));
873 valueBuffer.Append (GetPEValue (name));
876 throw NotWFError ("unexpected end of stream.");
878 if (this.normalization && XmlChar.IsInvalid (ch))
879 throw NotWFError ("Invalid character was found in the entity declaration.");
880 AppendValueChar (ch);
884 // string value = Dereference (CreateValueString (), false);
885 string value = CreateValueString ();
889 decl.LiteralEntityValue = value;
892 private DTDAttListDeclaration ReadAttListDecl ()
894 TryExpandPERefSpaceKeep ();
895 if (!SkipWhitespace ())
896 throw NotWFError ("Whitespace is required between ATTLIST and name in DTD attlist declaration.");
898 string name = ReadName (); // target element name
899 DTDAttListDeclaration decl =
900 DTD.AttListDecls [name] as DTDAttListDeclaration;
902 decl = new DTDAttListDeclaration (DTD);
903 decl.IsInternalSubset = this.processingInternalSubset;
906 if (!SkipWhitespace ())
907 if (PeekChar () != '>')
908 throw NotWFError ("Whitespace is required between name and content in non-empty DTD attlist declaration.");
912 while (XmlChar.IsNameChar (PeekChar ())) {
913 DTDAttributeDefinition def = ReadAttributeDefinition ();
914 // There must not be two or more ID attributes.
915 if (def.Datatype.TokenizedType == XmlTokenizedType.ID) {
916 for (int i = 0; i < decl.Definitions.Count; i++) {
917 DTDAttributeDefinition d = decl [i];
918 if (d.Datatype.TokenizedType == XmlTokenizedType.ID) {
919 HandleError (new XmlSchemaException ("AttList declaration must not contain two or more ID attributes.",
920 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
925 if (decl [def.Name] == null)
931 // This expanding is only allowed as a non-validating parser.
937 private DTDAttributeDefinition ReadAttributeDefinition ()
940 throw new NotImplementedException ();
942 DTDAttributeDefinition def = new DTDAttributeDefinition (DTD);
943 def.IsInternalSubset = this.processingInternalSubset;
947 def.Name = ReadName ();
948 if (!SkipWhitespace ())
949 throw NotWFError ("Whitespace is required between name and content in DTD attribute definition.");
953 switch(PeekChar ()) {
956 def.Datatype = XmlSchemaDatatype.FromName ("normalizedString", XmlSchema.Namespace);
958 case 'I': // ID, IDREF, IDREFS
960 if(PeekChar () == 'R') {
962 if(PeekChar () == 'S') {
965 def.Datatype = XmlSchemaDatatype.FromName ("IDREFS", XmlSchema.Namespace);
968 def.Datatype = XmlSchemaDatatype.FromName ("IDREF", XmlSchema.Namespace);
971 def.Datatype = XmlSchemaDatatype.FromName ("ID", XmlSchema.Namespace);
973 case 'E': // ENTITY, ENTITIES
975 switch(ReadChar ()) {
977 def.Datatype = XmlSchemaDatatype.FromName ("ENTITY", XmlSchema.Namespace);
979 case 'I': // ENTITIES
981 def.Datatype = XmlSchemaDatatype.FromName ("ENTITIES", XmlSchema.Namespace);
985 case 'N': // NMTOKEN, NMTOKENS, NOTATION
987 switch(PeekChar ()) {
990 if(PeekChar ()=='S') { // NMTOKENS
992 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKENS", XmlSchema.Namespace);
995 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
999 def.Datatype = XmlSchemaDatatype.FromName ("NOTATION", XmlSchema.Namespace);
1000 TryExpandPERefSpaceKeep ();
1001 if (!SkipWhitespace ())
1002 throw NotWFError ("Whitespace is required after notation name in DTD attribute definition.");
1006 def.EnumeratedNotations.Add (ReadName ()); // notation name
1009 while(PeekChar () == '|') {
1013 def.EnumeratedNotations.Add (ReadName ()); // notation name
1020 throw NotWFError ("attribute declaration syntax error.");
1023 default: // Enumerated Values
1024 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
1029 def.EnumeratedAttributeDeclaration.Add (
1030 def.Datatype.Normalize (ReadNmToken ())); // enum value
1032 while(PeekChar () == '|') {
1036 def.EnumeratedAttributeDeclaration.Add (
1037 def.Datatype.Normalize (ReadNmToken ())); // enum value
1044 TryExpandPERefSpaceKeep ();
1045 if (!SkipWhitespace ())
1046 throw NotWFError ("Whitespace is required between type and occurence in DTD attribute definition.");
1049 ReadAttributeDefaultValue (def);
1055 private void ReadAttributeDefaultValue (DTDAttributeDefinition def)
1057 if(PeekChar () == '#')
1063 Expect ("REQUIRED");
1064 def.OccurenceType = DTDAttributeOccurenceType.Required;
1068 def.OccurenceType = DTDAttributeOccurenceType.Optional;
1072 def.OccurenceType = DTDAttributeOccurenceType.Fixed;
1073 if (!SkipWhitespace ())
1074 throw NotWFError ("Whitespace is required between FIXED and actual value in DTD attribute definition.");
1075 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1079 // one of the enumerated value
1082 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1085 // VC: If default value exists, it should be valid.
1086 if (def.DefaultValue != null) {
1087 string normalized = def.Datatype.Normalize (def.DefaultValue);
1088 bool breakup = false;
1089 object parsed = null;
1091 // enumeration validity
1092 if (def.EnumeratedAttributeDeclaration.Count > 0) {
1093 if (!def.EnumeratedAttributeDeclaration.Contains (normalized)) {
1094 HandleError (new XmlSchemaException ("Default value is not one of the enumerated values.",
1095 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1099 if (def.EnumeratedNotations.Count > 0) {
1100 if (!def.EnumeratedNotations.Contains (normalized)) {
1101 HandleError (new XmlSchemaException ("Default value is not one of the enumerated notation values.",
1102 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1107 // type based validity
1110 parsed = def.Datatype.ParseValue (normalized, DTD.NameTable, null);
1111 } catch (Exception ex) { // FIXME: (wishlist) bad catch ;-(
1112 HandleError (new XmlSchemaException ("Invalid default value for ENTITY type.",
1113 def.LineNumber, def.LinePosition, null, def.BaseURI, ex));
1118 switch (def.Datatype.TokenizedType) {
1119 case XmlTokenizedType.ENTITY:
1120 if (DTD.EntityDecls [normalized] == null)
1121 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1122 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1124 case XmlTokenizedType.ENTITIES:
1125 string [] entities = parsed as string [];
1126 for (int i = 0; i < entities.Length; i++) {
1127 string entity = entities [i];
1128 if (DTD.EntityDecls [entity] == null)
1129 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1130 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1136 // Extra ID attribute validity check.
1137 if (def.Datatype != null && def.Datatype.TokenizedType == XmlTokenizedType.ID)
1138 if (def.UnresolvedDefaultValue != null)
1139 HandleError (new XmlSchemaException ("ID attribute must not have fixed value constraint.",
1140 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1144 private DTDNotationDeclaration ReadNotationDecl()
1146 DTDNotationDeclaration decl = new DTDNotationDeclaration (DTD);
1147 if (!SkipWhitespace ())
1148 throw NotWFError ("Whitespace is required between NOTATION and name in DTD notation declaration.");
1150 decl.Name = ReadName (); // notation name
1152 if (namespaces) { // copy from SetProperties ;-)
1153 int indexOfColon = decl.Name.IndexOf (':');
1155 if (indexOfColon == -1) {
1156 decl.Prefix = String.Empty;
1157 decl.LocalName = decl.Name;
1159 decl.Prefix = decl.Name.Substring (0, indexOfColon);
1160 decl.LocalName = decl.Name.Substring (indexOfColon + 1);
1164 decl.Prefix = String.Empty;
1165 decl.LocalName = decl.Name;
1169 if(PeekChar () == 'P') {
1170 decl.PublicId = ReadPubidLiteral ();
1171 bool wsSkipped = SkipWhitespace ();
1172 if (PeekChar () == '\'' || PeekChar () == '"') {
1174 throw NotWFError ("Whitespace is required between public id and system id.");
1175 decl.SystemId = ReadSystemLiteral (false);
1178 } else if(PeekChar () == 'S') {
1179 decl.SystemId = ReadSystemLiteral (true);
1182 if(decl.PublicId == null && decl.SystemId == null)
1183 throw NotWFError ("public or system declaration required for \"NOTATION\" declaration.");
1184 // This expanding is only allowed as a non-validating parser.
1190 private void ReadExternalID () {
1191 switch (PeekChar ()) {
1193 cachedSystemId = ReadSystemLiteral (true);
1196 cachedPublicId = ReadPubidLiteral ();
1197 if (!SkipWhitespace ())
1198 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
1199 cachedSystemId = ReadSystemLiteral (false);
1204 // The reader is positioned on the first 'S' of "SYSTEM".
1205 private string ReadSystemLiteral (bool expectSYSTEM)
1209 if (!SkipWhitespace ())
1210 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
1214 int quoteChar = ReadChar (); // apos or quot
1216 ClearValueBuffer ();
1217 while (c != quoteChar) {
1220 throw NotWFError ("Unexpected end of stream in ExternalID.");
1222 AppendValueChar (c);
1224 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1227 private string ReadPubidLiteral()
1230 if (!SkipWhitespace ())
1231 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
1232 int quoteChar = ReadChar ();
1234 ClearValueBuffer ();
1235 while(c != quoteChar)
1238 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
1239 if(c != quoteChar && !XmlChar.IsPubidChar (c))
1240 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char) c));
1242 AppendValueChar (c);
1244 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1247 // The reader is positioned on the first character
1249 internal string ReadName ()
1251 return ReadNameOrNmToken(false);
1254 // The reader is positioned on the first character
1256 private string ReadNmToken ()
1258 return ReadNameOrNmToken(true);
1261 private string ReadNameOrNmToken(bool isNameToken)
1263 int ch = PeekChar ();
1265 if (!XmlChar.IsNameChar (ch))
1266 throw NotWFError (String.Format ("a nmtoken did not start with a legal character {0} ({1})", ch, (char) ch));
1269 if (!XmlChar.IsFirstNameChar (ch))
1270 throw NotWFError (String.Format ("a name did not start with a legal character {0} ({1})", ch, (char) ch));
1275 AppendNameChar (ReadChar ());
1277 while (XmlChar.IsNameChar (PeekChar ())) {
1278 AppendNameChar (ReadChar ());
1281 return CreateNameString ();
1284 // Read the next character and compare it against the
1285 // specified character.
1286 private void Expect (int expected)
1288 int ch = ReadChar ();
1290 if (ch != expected) {
1291 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1292 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1300 private void Expect (string expected)
1302 int len = expected.Length;
1303 for (int i=0; i< len; i++)
1304 Expect (expected [i]);
1307 private void ExpectAfterWhitespace (char c)
1310 int i = ReadChar ();
1311 if (XmlChar.IsWhitespace (i))
1314 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0} but found {1} [{2}].", c, (char) i, i));
1319 // Does not consume the first non-whitespace character.
1320 private bool SkipWhitespace ()
1322 bool skipped = XmlChar.IsWhitespace (PeekChar ());
1323 while (XmlChar.IsWhitespace (PeekChar ()))
1328 private int PeekChar ()
1330 return currentInput.PeekChar ();
1333 private int ReadChar ()
1335 return currentInput.ReadChar ();
1338 // The reader is positioned on the first character after
1339 // the leading '<!--'.
1340 private void ReadComment ()
1342 currentInput.AllowTextDecl = false;
1344 while (PeekChar () != -1) {
1345 int ch = ReadChar ();
1347 if (ch == '-' && PeekChar () == '-') {
1350 if (PeekChar () != '>')
1351 throw NotWFError ("comments cannot contain '--'");
1357 if (XmlChar.IsInvalid (ch))
1358 throw NotWFError ("Not allowed character was found.");
1362 // The reader is positioned on the first character
1365 // It may be xml declaration or processing instruction.
1366 private void ReadProcessingInstruction ()
1368 string target = ReadName ();
1369 if (target == "xml") {
1370 ReadTextDeclaration ();
1372 } else if (CultureInfo.InvariantCulture.CompareInfo.Compare (target, "xml", CompareOptions.IgnoreCase) == 0)
1373 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1375 currentInput.AllowTextDecl = false;
1377 if (!SkipWhitespace ())
1378 if (PeekChar () != '?')
1379 throw NotWFError ("Invalid processing instruction name was found.");
1381 while (PeekChar () != -1) {
1382 int ch = ReadChar ();
1384 if (ch == '?' && PeekChar () == '>') {
1391 // The reader is positioned after "<?xml "
1392 private void ReadTextDeclaration ()
1394 if (!currentInput.AllowTextDecl)
1395 throw NotWFError ("Text declaration cannot appear in this state.");
1397 currentInput.AllowTextDecl = false;
1402 if (PeekChar () == 'v') {
1404 ExpectAfterWhitespace ('=');
1406 int quoteChar = ReadChar ();
1407 char [] expect1_0 = new char [3];
1408 int versionLength = 0;
1409 switch (quoteChar) {
1412 while (PeekChar () != quoteChar) {
1413 if (PeekChar () == -1)
1414 throw NotWFError ("Invalid version declaration inside text declaration.");
1415 else if (versionLength == 3)
1416 throw NotWFError ("Invalid version number inside text declaration.");
1418 expect1_0 [versionLength] = (char) ReadChar ();
1420 if (versionLength == 3 && new String (expect1_0) != "1.0")
1421 throw NotWFError ("Invalid version number inside text declaration.");
1428 throw NotWFError ("Invalid version declaration inside text declaration.");
1432 if (PeekChar () == 'e') {
1433 Expect ("encoding");
1434 ExpectAfterWhitespace ('=');
1436 int quoteChar = ReadChar ();
1437 switch (quoteChar) {
1440 while (PeekChar () != quoteChar)
1441 if (ReadChar () == -1)
1442 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1447 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1449 // Encoding value should be checked inside XmlInputStream.
1452 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
1457 // Note that now this method behaves differently from
1458 // XmlTextReader's one. It calles AppendValueChar() internally.
1460 private int ReadCharacterReference ()
1464 if (PeekChar () == 'x') {
1467 while (PeekChar () != ';' && PeekChar () != -1) {
1468 int ch = ReadChar ();
1470 if (ch >= '0' && ch <= '9')
1471 value = (value << 4) + ch - '0';
1472 else if (ch >= 'A' && ch <= 'F')
1473 value = (value << 4) + ch - 'A' + 10;
1474 else if (ch >= 'a' && ch <= 'f')
1475 value = (value << 4) + ch - 'a' + 10;
1477 throw NotWFError (String.Format (
1478 CultureInfo.InvariantCulture,
1479 "invalid hexadecimal digit: {0} (#x{1:X})",
1484 while (PeekChar () != ';' && PeekChar () != -1) {
1485 int ch = ReadChar ();
1487 if (ch >= '0' && ch <= '9')
1488 value = value * 10 + ch - '0';
1490 throw NotWFError (String.Format (
1491 CultureInfo.InvariantCulture,
1492 "invalid decimal digit: {0} (#x{1:X})",
1500 // There is no way to save surrogate pairs...
1501 if (XmlChar.IsInvalid (value))
1502 throw NotWFError ("Referenced character was not allowed in XML.");
1503 AppendValueChar (value);
1508 private void AppendNameChar (int ch)
1510 CheckNameCapacity ();
1511 if (ch < Char.MaxValue)
1512 nameBuffer [nameLength++] = (char) ch;
1514 nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1515 CheckNameCapacity ();
1516 nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00);
1520 private void CheckNameCapacity ()
1522 if (nameLength == nameCapacity) {
1523 nameCapacity = nameCapacity * 2;
1524 char [] oldNameBuffer = nameBuffer;
1525 nameBuffer = new char [nameCapacity];
1526 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1530 private string CreateNameString ()
1532 return DTD.NameTable.Add (nameBuffer, 0, nameLength);
1535 private void AppendValueChar (int ch)
1537 //See http://www.faqs.org/rfcs/rfc2781.html for used algorithm
1539 valueBuffer.Append ((char) ch);
1543 throw new XmlException ("The numeric entity value is too large", null, LineNumber, LinePosition);
1546 int utag = ch - 0x10000;
1547 valueBuffer.Append((char) ((utag >> 10) + 0xD800));
1548 valueBuffer.Append((char) ((utag & 0x3FF) + 0xDC00));
1552 private string CreateValueString ()
1554 return valueBuffer.ToString ();
1557 private void ClearValueBuffer ()
1559 valueBuffer.Length = 0;
1562 // The reader is positioned on the quote character.
1563 // *Keeps quote char* to value to get_QuoteChar() correctly.
1564 private string ReadDefaultAttribute ()
1566 ClearValueBuffer ();
1570 int quoteChar = ReadChar ();
1572 if (quoteChar != '\'' && quoteChar != '\"')
1573 throw NotWFError ("an attribute value was not quoted");
1575 AppendValueChar (quoteChar);
1577 while (PeekChar () != quoteChar) {
1578 int ch = ReadChar ();
1583 throw NotWFError ("attribute values cannot contain '<'");
1585 throw NotWFError ("unexpected end of file in an attribute value");
1587 AppendValueChar (ch);
1588 if (PeekChar () == '#')
1590 // Check XML 1.0 section 3.1 WFC.
1591 string entName = ReadName ();
1593 if (XmlChar.GetPredefinedEntity (entName) < 0) {
1594 DTDEntityDeclaration entDecl =
1595 DTD == null ? null : DTD.EntityDecls [entName];
1596 if (entDecl == null || entDecl.SystemId != null)
1597 // WFC: Entity Declared (see 4.1)
1598 if (DTD.IsStandalone || (DTD.SystemId == null && !DTD.InternalSubsetHasPEReference))
1599 throw NotWFError ("Reference to external entities is not allowed in attribute value.");
1601 valueBuffer.Append (entName);
1602 AppendValueChar (';');
1605 AppendValueChar (ch);
1610 ReadChar (); // quoteChar
1611 AppendValueChar (quoteChar);
1613 return CreateValueString ();
1616 private void PushParserInput (string url)
1620 if (DTD.BaseURI != null && DTD.BaseURI.Length > 0)
1621 baseUri = new Uri (DTD.BaseURI);
1622 } catch (UriFormatException) {
1625 Uri absUri = url != null && url.Length > 0 ?
1626 DTD.Resolver.ResolveUri (baseUri, url) : baseUri;
1627 string absPath = absUri != null ? absUri.ToString () : String.Empty;
1629 foreach (XmlParserInput i in parserInputStack.ToArray ()) {
1630 if (i.BaseURI == absPath)
1631 throw NotWFError ("Nested inclusion is not allowed: " + url);
1633 parserInputStack.Push (currentInput);
1635 MemoryStream ms = new MemoryStream ();
1637 s = DTD.Resolver.GetEntity (absUri, null, typeof (Stream)) as Stream;
1639 byte [] buf = new byte [4096];
1641 size = s.Read (buf, 0, buf.Length);
1642 ms.Write (buf, 0, size);
1646 currentInput = new XmlParserInput (new XmlStreamReader (ms), absPath);
1647 } catch (Exception ex) { // FIXME: (wishlist) Bad exception catch ;-(
1650 int line = currentInput == null ? 0 : currentInput.LineNumber;
1651 int col = currentInput == null ? 0 : currentInput.LinePosition;
1652 string bu = (currentInput == null) ? String.Empty : currentInput.BaseURI;
1653 HandleError (new XmlSchemaException ("Specified external entity not found. Target URL is " + url + " .",
1654 line, col, null, bu, ex));
1655 currentInput = new XmlParserInput (new StringReader (String.Empty), absPath);
1659 private void PopParserInput ()
1661 currentInput.Close ();
1662 currentInput = parserInputStack.Pop () as XmlParserInput;
1665 private void HandleError (XmlSchemaException ex)
1667 #if DTD_HANDLE_EVENTS
1668 if (this.ValidationEventHandler != null)
1669 ValidationEventHandler (this, new ValidationEventArgs (ex, ex.Message, XmlSeverityType.Error));