2 // System.Xml.DTDReader
5 // Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
7 // (C)2003 Atsushi Enomoto
11 // When a parameter entity contains cp section, it should be closed
12 // within that declaration.
14 // Resolution to external entities from different BaseURI fails (it is
15 // the same as MS.NET 1.1, but should be fixed in the future).
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 using System.Collections;
41 using System.Globalization;
45 using System.Xml.Schema;
49 internal class DTDReader : IXmlLineInfo
51 private XmlParserInput currentInput;
52 private Stack parserInputStack;
54 private char [] nameBuffer;
55 private int nameLength;
56 private int nameCapacity;
57 private const int initialNameCapacity = 256;
59 private StringBuilder valueBuffer;
61 private int currentLinkedNodeLineNumber;
62 private int currentLinkedNodeLinePosition;
64 // Parameter entity placeholder
65 private int dtdIncludeSect;
67 private bool normalization;
69 private bool processingInternalSubset;
71 string cachedPublicId;
72 string cachedSystemId;
77 public event ValidationEventHandler ValidationEventHandler;
82 public DTDReader (DTDObjectModel dtd,
84 int startLinePosition)
87 currentLinkedNodeLineNumber = startLineNumber;
88 currentLinkedNodeLinePosition = startLinePosition;
94 public string BaseURI {
95 get { return currentInput.BaseURI; }
98 public bool Normalization {
99 get { return normalization; }
100 set { normalization = value; }
103 public int LineNumber {
104 get { return currentInput.LineNumber; }
107 public int LinePosition {
108 get { return currentInput.LinePosition; }
111 public bool HasLineInfo ()
118 private XmlException NotWFError (string message)
120 return new XmlException (this as IXmlLineInfo, BaseURI, message);
125 parserInputStack = new Stack ();
127 nameBuffer = new char [initialNameCapacity];
129 nameCapacity = initialNameCapacity;
131 valueBuffer = new StringBuilder (512);
134 internal DTDObjectModel GenerateDTDObjectModel ()
137 int originalParserDepth = parserInputStack.Count;
139 if (DTD.InternalSubset != null && DTD.InternalSubset.Length > 0) {
140 this.processingInternalSubset = true;
141 XmlParserInput original = currentInput;
143 currentInput = new XmlParserInput (
144 new StringReader (DTD.InternalSubset),
146 currentLinkedNodeLineNumber,
147 currentLinkedNodeLinePosition);
148 currentInput.AllowTextDecl = false;
150 more = ProcessDTDSubset ();
151 if (PeekChar () == -1 && parserInputStack.Count > 0)
153 } while (more || parserInputStack.Count > originalParserDepth);
154 if (dtdIncludeSect != 0)
155 throw NotWFError ("INCLUDE section is not ended correctly.");
157 currentInput = original;
158 this.processingInternalSubset = false;
160 if (DTD.SystemId != null && DTD.SystemId != String.Empty && DTD.Resolver != null) {
161 PushParserInput (DTD.SystemId);
163 more = ProcessDTDSubset ();
164 if (PeekChar () == -1 && parserInputStack.Count > 1)
166 } while (more || parserInputStack.Count > originalParserDepth + 1);
167 if (dtdIncludeSect != 0)
168 throw NotWFError ("INCLUDE section is not ended correctly.");
172 ArrayList sc = new ArrayList ();
174 // Entity recursion check.
175 foreach (DTDEntityDeclaration ent in DTD.EntityDecls.Values) {
176 if (ent.NotationName != null) {
177 ent.ScanEntityValue (sc);
181 // release unnecessary memory usage
182 DTD.ExternalResources.Clear ();
187 // Read any one of following:
188 // elementdecl, AttlistDecl, EntityDecl, NotationDecl,
189 // PI, Comment, Parameter Entity, or doctype termination char(']')
191 // Returns true if it may have any more contents, or false if not.
192 private bool ProcessDTDSubset ()
195 int c2 = ReadChar ();
201 // It affects on entity references' well-formedness
202 if (this.processingInternalSubset)
203 DTD.InternalSubsetHasPEReference = true;
204 string peName = ReadName ();
206 DTDParameterEntityDeclaration peDecl = GetPEDecl (peName);
209 currentInput.PushPEBuffer (peDecl);
210 // int currentLine = currentInput.LineNumber;
211 // int currentColumn = currentInput.LinePosition;
212 while (currentInput.HasPEBuffer)
215 // FIXME: Implement correct nest-level check.
216 // Don't depend on lineinfo (might not be supplied)
217 // if (currentInput.LineNumber != currentLine ||
218 // currentInput.LinePosition != currentColumn)
219 // throw NotWFError ("Incorrectly nested parameter entity.");
226 // Only read, no store.
227 ReadProcessingInstruction ();
230 CompileDeclaration ();
233 throw NotWFError ("Unexpected end of stream.");
235 throw NotWFError ("Syntax Error after '<' character: " + (char) c);
239 if (dtdIncludeSect == 0)
240 throw NotWFError ("Unbalanced end of INCLUDE/IGNORE section.");
247 throw NotWFError (String.Format ("Syntax Error inside doctypedecl markup : {0}({1})", c2, (char) c2));
249 currentInput.AllowTextDecl = false;
253 private void CompileDeclaration ()
259 // Only read, no store.
267 if (!SkipWhitespace ())
269 "Whitespace is required after '<!ENTITY' in DTD entity declaration.");
271 if (PeekChar () == '%') {
273 if (!SkipWhitespace ()) {
277 // FIXME: Is this allowed? <!ENTITY % %name; ...>
278 // (i.e. Can PE name be replaced by another PE?)
280 if (XmlChar.IsNameChar (PeekChar ()))
281 ReadParameterEntityDecl ();
283 throw NotWFError ("expected name character");
287 DTDEntityDeclaration ent = ReadEntityDecl ();
288 if (DTD.EntityDecls [ent.Name] == null)
289 DTD.EntityDecls.Add (ent.Name, ent);
293 DTDElementDeclaration el = ReadElementDecl ();
294 DTD.ElementDecls.Add (el.Name, el);
297 throw NotWFError ("Syntax Error after '<!E' (ELEMENT or ENTITY must be found)");
302 DTDAttListDeclaration atl = ReadAttListDecl ();
303 DTD.AttListDecls.Add (atl.Name, atl);
307 DTDNotationDeclaration not = ReadNotationDecl ();
308 DTD.NotationDecls.Add (not.Name, not);
311 // conditional sections
315 switch (ReadChar ()) {
318 ExpectAfterWhitespace ('[');
328 throw NotWFError ("Syntax Error after '<!' characters.");
332 private void ReadIgnoreSect ()
334 ExpectAfterWhitespace ('[');
335 int dtdIgnoreSect = 1;
337 while (dtdIgnoreSect > 0) {
338 switch (ReadChar ()) {
340 throw NotWFError ("Unexpected IGNORE section end.");
342 if (PeekChar () != '!')
345 if (PeekChar () != '[')
351 if (PeekChar () != ']')
354 if (PeekChar () != '>')
361 if (dtdIgnoreSect != 0)
362 throw NotWFError ("IGNORE section is not ended correctly.");
365 // The reader is positioned on the head of the name.
366 private DTDElementDeclaration ReadElementDecl ()
368 DTDElementDeclaration decl = new DTDElementDeclaration (DTD);
369 decl.IsInternalSubset = this.processingInternalSubset;
371 if (!SkipWhitespace ())
372 throw NotWFError ("Whitespace is required between '<!ELEMENT' and name in DTD element declaration.");
374 decl.Name = ReadName ();
375 if (!SkipWhitespace ())
376 throw NotWFError ("Whitespace is required between name and content in DTD element declaration.");
378 ReadContentSpec (decl);
380 // This expanding is only allowed as a non-validating parser.
386 // read 'children'(BNF) of contentspec
387 private void ReadContentSpec (DTDElementDeclaration decl)
401 DTDContentModel model = decl.ContentModel;
404 if(PeekChar () == '#') {
405 // Mixed Contents. "#PCDATA" must appear first.
406 decl.IsMixedContent = true;
407 model.Occurence = DTDOccurence.ZeroOrMore;
408 model.OrderType = DTDContentOrderType.Or;
412 while(PeekChar () != ')') {
414 if (PeekChar () == '%') {
421 DTDContentModel elem = new DTDContentModel (DTD, decl.Name);
422 // elem.LineNumber = currentInput.LineNumber;
423 // elem.LinePosition = currentInput.LinePosition;
424 elem.ElementName = ReadName ();
425 this.AddContentModel (model.ChildModels, elem);
430 if (model.ChildModels.Count > 0)
432 else if (PeekChar () == '*')
435 // Non-Mixed Contents
436 model.ChildModels.Add (ReadCP (decl));
439 do { // copied from ReadCP() ...;-)
440 if (PeekChar () == '%') {
444 if(PeekChar ()=='|') {
446 if (model.OrderType == DTDContentOrderType.Seq)
447 throw NotWFError ("Inconsistent choice markup in sequence cp.");
448 model.OrderType = DTDContentOrderType.Or;
451 AddContentModel (model.ChildModels, ReadCP (decl));
454 else if(PeekChar () == ',')
457 if (model.OrderType == DTDContentOrderType.Or)
458 throw NotWFError ("Inconsistent sequence markup in choice cp.");
459 model.OrderType = DTDContentOrderType.Seq;
462 model.ChildModels.Add (ReadCP (decl));
474 model.Occurence = DTDOccurence.Optional;
478 model.Occurence = DTDOccurence.ZeroOrMore;
482 model.Occurence = DTDOccurence.OneOrMore;
491 throw NotWFError ("ContentSpec is missing.");
495 // Read 'cp' (BNF) of contentdecl (BNF)
496 private DTDContentModel ReadCP (DTDElementDeclaration elem)
498 DTDContentModel model = null;
500 if(PeekChar () == '(') {
501 model = new DTDContentModel (DTD, elem.Name);
504 model.ChildModels.Add (ReadCP (elem));
507 if (PeekChar () == '%') {
511 if(PeekChar ()=='|') {
513 if (model.OrderType == DTDContentOrderType.Seq)
514 throw NotWFError ("Inconsistent choice markup in sequence cp.");
515 model.OrderType = DTDContentOrderType.Or;
518 AddContentModel (model.ChildModels, ReadCP (elem));
521 else if(PeekChar () == ',') {
523 if (model.OrderType == DTDContentOrderType.Or)
524 throw NotWFError ("Inconsistent sequence markup in choice cp.");
525 model.OrderType = DTDContentOrderType.Seq;
528 model.ChildModels.Add (ReadCP (elem));
535 ExpectAfterWhitespace (')');
539 model = new DTDContentModel (DTD, elem.Name);
540 model.ElementName = ReadName ();
543 switch(PeekChar ()) {
545 model.Occurence = DTDOccurence.Optional;
549 model.Occurence = DTDOccurence.ZeroOrMore;
553 model.Occurence = DTDOccurence.OneOrMore;
560 private void AddContentModel (DTDContentModelCollection cmc, DTDContentModel cm)
562 if (cm.ElementName != null) {
563 for (int i = 0; i < cmc.Count; i++) {
564 if (cmc [i].ElementName == cm.ElementName) {
565 HandleError (new XmlSchemaException ("Element content must be unique inside mixed content model.",
578 // The reader is positioned on the first name char.
579 private void ReadParameterEntityDecl ()
581 DTDParameterEntityDeclaration decl =
582 new DTDParameterEntityDeclaration (DTD);
583 decl.BaseURI = BaseURI;
584 decl.XmlResolver = DTD.Resolver;
586 decl.Name = ReadName ();
587 if (!SkipWhitespace ())
588 throw NotWFError ("Whitespace is required after name in DTD parameter entity declaration.");
590 if (PeekChar () == 'S' || PeekChar () == 'P') {
591 // read publicId/systemId
593 decl.PublicId = cachedPublicId;
594 decl.SystemId = cachedSystemId;
598 ResolveExternalEntityReplacementText (decl);
601 int quoteChar = ReadChar ();
602 if (quoteChar != '\'' && quoteChar != '"')
603 throw NotWFError ("quotation char was expected.");
610 throw NotWFError ("unexpected end of stream in entity value definition.");
612 if (quoteChar == '"')
615 AppendValueChar ('"');
618 if (quoteChar == '\'')
621 AppendValueChar ('\'');
624 if (XmlChar.IsInvalid (c))
625 throw NotWFError ("Invalid character was used to define parameter entity.");
630 decl.LiteralEntityValue = CreateValueString ();
632 ResolveInternalEntityReplacementText (decl);
634 ExpectAfterWhitespace ('>');
637 if (DTD.PEDecls [decl.Name] == null) {
638 DTD.PEDecls.Add (decl.Name, decl);
642 private void ResolveExternalEntityReplacementText (DTDEntityBase decl)
644 if (decl.SystemId != null && decl.SystemId.Length > 0) {
645 // FIXME: not always it should be read in Element context
646 XmlTextReader xtr = new XmlTextReader (decl.LiteralEntityValue, XmlNodeType.Element, null);
647 xtr.SkipTextDeclaration ();
648 if (decl is DTDEntityDeclaration && DTD.EntityDecls [decl.Name] == null) {
649 // GE - also checked as valid contents
650 StringBuilder sb = new StringBuilder ();
651 xtr.Normalization = this.Normalization;
654 sb.Append (xtr.ReadOuterXml ());
655 decl.ReplacementText = sb.ToString ();
659 decl.ReplacementText = xtr.GetRemainder ().ReadToEnd ();
662 decl.ReplacementText = decl.LiteralEntityValue;
665 private void ResolveInternalEntityReplacementText (DTDEntityBase decl)
667 string value = decl.LiteralEntityValue;
668 int len = value.Length;
670 for (int i = 0; i < len; i++) {
677 end = value.IndexOf (';', i);
679 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
681 if (value [i] == '#') {
683 ch = GetCharacterReference (decl, value, ref i, end);
684 if (XmlChar.IsInvalid (ch))
685 throw NotWFError ("Invalid character was used to define parameter entity.");
688 name = value.Substring (i, end - i);
689 if (!XmlChar.IsName (name))
690 throw NotWFError (String.Format ("'{0}' is not a valid entity reference name.", name));
691 // don't expand "general" entity.
692 AppendValueChar ('&');
693 valueBuffer.Append (name);
694 AppendValueChar (';');
698 if (XmlChar.IsInvalid (ch))
699 throw new XmlException (decl, decl.BaseURI, "Invalid character was found in the entity declaration.");
700 AppendValueChar (ch);
704 end = value.IndexOf (';', i);
706 throw new XmlException (decl, decl.BaseURI, "Invalid reference markup.");
707 name = value.Substring (i, end - i);
708 valueBuffer.Append (GetPEValue (name));
712 AppendValueChar (ch);
716 decl.ReplacementText = CreateValueString ();
721 private int GetCharacterReference (DTDEntityBase li, string value, ref int index, int end)
724 if (value [index] == 'x') {
726 ret = int.Parse (value.Substring (index + 1, end - index - 1), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
727 } catch (FormatException) {
728 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
732 ret = int.Parse (value.Substring (index, end - index), CultureInfo.InvariantCulture);
733 } catch (FormatException) {
734 throw new XmlException (li, li.BaseURI, "Invalid number for a character reference.");
741 private string GetPEValue (string peName)
743 DTDParameterEntityDeclaration peDecl = GetPEDecl (peName);
744 return peDecl != null ?
745 peDecl.ReplacementText : String.Empty;
748 private DTDParameterEntityDeclaration GetPEDecl (string peName)
750 DTDParameterEntityDeclaration peDecl =
751 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
752 if (peDecl != null) {
753 if (peDecl.IsInternalSubset)
754 throw NotWFError ("Parameter entity is not allowed in internal subset entity '" + peName + "'");
757 // See XML 1.0 section 4.1 for both WFC and VC.
758 if ((DTD.SystemId == null && !DTD.InternalSubsetHasPEReference) || DTD.IsStandalone)
759 throw NotWFError (String.Format ("Parameter entity '{0}' not found.",peName));
760 HandleError (new XmlSchemaException (
761 "Parameter entity " + peName + " not found.", null));
765 private bool TryExpandPERef ()
767 if (PeekChar () != '%')
769 while (PeekChar () == '%') {
770 TryExpandPERefSpaceKeep ();
776 // Tries to expand parameter entities, but it should not skip spaces
777 private bool TryExpandPERefSpaceKeep ()
779 if (PeekChar () == '%') {
780 if (this.processingInternalSubset)
781 throw NotWFError ("Parameter entity reference is not allowed inside internal subset.");
790 // reader is positioned after '%'
791 private void ExpandPERef ()
793 string peName = ReadName ();
795 DTDParameterEntityDeclaration peDecl =
796 DTD.PEDecls [peName] as DTDParameterEntityDeclaration;
797 if (peDecl == null) {
798 HandleError (new XmlSchemaException ("Parameter entity " + peName + " not found.", null));
799 return; // do nothing
801 currentInput.PushPEBuffer (peDecl);
804 // The reader is positioned on the head of the name.
805 private DTDEntityDeclaration ReadEntityDecl ()
807 DTDEntityDeclaration decl = new DTDEntityDeclaration (DTD);
808 decl.BaseURI = BaseURI;
809 decl.XmlResolver = DTD.Resolver;
810 decl.IsInternalSubset = this.processingInternalSubset;
812 decl.Name = ReadName ();
813 if (!SkipWhitespace ())
814 throw NotWFError ("Whitespace is required between name and content in DTD entity declaration.");
817 if (PeekChar () == 'S' || PeekChar () == 'P') {
820 decl.PublicId = cachedPublicId;
821 decl.SystemId = cachedSystemId;
822 if (SkipWhitespace ()) {
823 if (PeekChar () == 'N') {
826 if (!SkipWhitespace ())
827 throw NotWFError ("Whitespace is required after NDATA.");
828 decl.NotationName = ReadName (); // ndata_name
831 if (decl.NotationName == null) {
833 ResolveExternalEntityReplacementText (decl);
836 decl.LiteralEntityValue = String.Empty;
837 decl.ReplacementText = String.Empty;
842 ReadEntityValueDecl (decl);
843 ResolveInternalEntityReplacementText (decl);
846 // This expanding is only allowed as a non-validating parser.
852 private void ReadEntityValueDecl (DTDEntityDeclaration decl)
855 // quotation char will be finally removed on unescaping
856 int quoteChar = ReadChar ();
857 if (quoteChar != '\'' && quoteChar != '"')
858 throw NotWFError ("quotation char was expected.");
861 while (PeekChar () != quoteChar) {
862 int ch = ReadChar ();
865 string name = ReadName ();
867 if (decl.IsInternalSubset)
868 throw NotWFError (String.Format ("Parameter entity is not allowed in internal subset entity '{0}'", name));
869 valueBuffer.Append (GetPEValue (name));
872 throw NotWFError ("unexpected end of stream.");
874 if (this.normalization && XmlChar.IsInvalid (ch))
875 throw NotWFError ("Invalid character was found in the entity declaration.");
876 AppendValueChar (ch);
880 // string value = Dereference (CreateValueString (), false);
881 string value = CreateValueString ();
885 decl.LiteralEntityValue = value;
888 private DTDAttListDeclaration ReadAttListDecl ()
890 TryExpandPERefSpaceKeep ();
891 if (!SkipWhitespace ())
892 throw NotWFError ("Whitespace is required between ATTLIST and name in DTD attlist declaration.");
894 string name = ReadName (); // target element name
895 DTDAttListDeclaration decl =
896 DTD.AttListDecls [name] as DTDAttListDeclaration;
898 decl = new DTDAttListDeclaration (DTD);
899 decl.IsInternalSubset = this.processingInternalSubset;
902 if (!SkipWhitespace ())
903 if (PeekChar () != '>')
904 throw NotWFError ("Whitespace is required between name and content in non-empty DTD attlist declaration.");
908 while (XmlChar.IsNameChar (PeekChar ())) {
909 DTDAttributeDefinition def = ReadAttributeDefinition ();
910 // There must not be two or more ID attributes.
911 if (def.Datatype.TokenizedType == XmlTokenizedType.ID) {
912 for (int i = 0; i < decl.Definitions.Count; i++) {
913 DTDAttributeDefinition d = decl [i];
914 if (d.Datatype.TokenizedType == XmlTokenizedType.ID) {
915 HandleError (new XmlSchemaException ("AttList declaration must not contain two or more ID attributes.",
916 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
921 if (decl [def.Name] == null)
927 // This expanding is only allowed as a non-validating parser.
933 private DTDAttributeDefinition ReadAttributeDefinition ()
935 DTDAttributeDefinition def = new DTDAttributeDefinition (DTD);
936 def.IsInternalSubset = this.processingInternalSubset;
940 def.Name = ReadName ();
941 if (!SkipWhitespace ())
942 throw NotWFError ("Whitespace is required between name and content in DTD attribute definition.");
946 switch(PeekChar ()) {
949 def.Datatype = XmlSchemaDatatype.FromName ("normalizedString", XmlSchema.Namespace);
951 case 'I': // ID, IDREF, IDREFS
953 if(PeekChar () == 'R') {
955 if(PeekChar () == 'S') {
958 def.Datatype = XmlSchemaDatatype.FromName ("IDREFS", XmlSchema.Namespace);
961 def.Datatype = XmlSchemaDatatype.FromName ("IDREF", XmlSchema.Namespace);
964 def.Datatype = XmlSchemaDatatype.FromName ("ID", XmlSchema.Namespace);
966 case 'E': // ENTITY, ENTITIES
968 switch(ReadChar ()) {
970 def.Datatype = XmlSchemaDatatype.FromName ("ENTITY", XmlSchema.Namespace);
972 case 'I': // ENTITIES
974 def.Datatype = XmlSchemaDatatype.FromName ("ENTITIES", XmlSchema.Namespace);
978 case 'N': // NMTOKEN, NMTOKENS, NOTATION
980 switch(PeekChar ()) {
983 if(PeekChar ()=='S') { // NMTOKENS
985 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKENS", XmlSchema.Namespace);
988 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
992 def.Datatype = XmlSchemaDatatype.FromName ("NOTATION", XmlSchema.Namespace);
993 TryExpandPERefSpaceKeep ();
994 if (!SkipWhitespace ())
995 throw NotWFError ("Whitespace is required after notation name in DTD attribute definition.");
999 def.EnumeratedNotations.Add (ReadName ()); // notation name
1002 while(PeekChar () == '|') {
1006 def.EnumeratedNotations.Add (ReadName ()); // notation name
1013 throw NotWFError ("attribute declaration syntax error.");
1016 default: // Enumerated Values
1017 def.Datatype = XmlSchemaDatatype.FromName ("NMTOKEN", XmlSchema.Namespace);
1022 def.EnumeratedAttributeDeclaration.Add (
1023 def.Datatype.Normalize (ReadNmToken ())); // enum value
1025 while(PeekChar () == '|') {
1029 def.EnumeratedAttributeDeclaration.Add (
1030 def.Datatype.Normalize (ReadNmToken ())); // enum value
1037 TryExpandPERefSpaceKeep ();
1038 if (!SkipWhitespace ())
1039 throw NotWFError ("Whitespace is required between type and occurence in DTD attribute definition.");
1042 ReadAttributeDefaultValue (def);
1047 private void ReadAttributeDefaultValue (DTDAttributeDefinition def)
1049 if(PeekChar () == '#')
1055 Expect ("REQUIRED");
1056 def.OccurenceType = DTDAttributeOccurenceType.Required;
1060 def.OccurenceType = DTDAttributeOccurenceType.Optional;
1064 def.OccurenceType = DTDAttributeOccurenceType.Fixed;
1065 if (!SkipWhitespace ())
1066 throw NotWFError ("Whitespace is required between FIXED and actual value in DTD attribute definition.");
1067 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1071 // one of the enumerated value
1074 def.UnresolvedDefaultValue = ReadDefaultAttribute ();
1077 // VC: If default value exists, it should be valid.
1078 if (def.DefaultValue != null) {
1079 string normalized = def.Datatype.Normalize (def.DefaultValue);
1080 bool breakup = false;
1081 object parsed = null;
1083 // enumeration validity
1084 if (def.EnumeratedAttributeDeclaration.Count > 0) {
1085 if (!def.EnumeratedAttributeDeclaration.Contains (normalized)) {
1086 HandleError (new XmlSchemaException ("Default value is not one of the enumerated values.",
1087 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1091 if (def.EnumeratedNotations.Count > 0) {
1092 if (!def.EnumeratedNotations.Contains (normalized)) {
1093 HandleError (new XmlSchemaException ("Default value is not one of the enumerated notation values.",
1094 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1099 // type based validity
1102 parsed = def.Datatype.ParseValue (normalized, DTD.NameTable, null);
1103 } catch (Exception ex) { // FIXME: (wishlist) bad catch ;-(
1104 HandleError (new XmlSchemaException ("Invalid default value for ENTITY type.",
1105 def.LineNumber, def.LinePosition, null, def.BaseURI, ex));
1110 switch (def.Datatype.TokenizedType) {
1111 case XmlTokenizedType.ENTITY:
1112 if (DTD.EntityDecls [normalized] == null)
1113 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1114 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1116 case XmlTokenizedType.ENTITIES:
1117 string [] entities = parsed as string [];
1118 for (int i = 0; i < entities.Length; i++) {
1119 string entity = entities [i];
1120 if (DTD.EntityDecls [entity] == null)
1121 HandleError (new XmlSchemaException ("Specified entity declaration used by default attribute value was not found.",
1122 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1128 // Extra ID attribute validity check.
1129 if (def.Datatype != null && def.Datatype.TokenizedType == XmlTokenizedType.ID)
1130 if (def.UnresolvedDefaultValue != null)
1131 HandleError (new XmlSchemaException ("ID attribute must not have fixed value constraint.",
1132 def.LineNumber, def.LinePosition, null, def.BaseURI, null));
1136 private DTDNotationDeclaration ReadNotationDecl()
1138 DTDNotationDeclaration decl = new DTDNotationDeclaration (DTD);
1139 if (!SkipWhitespace ())
1140 throw NotWFError ("Whitespace is required between NOTATION and name in DTD notation declaration.");
1142 decl.Name = ReadName (); // notation name
1144 if (namespaces) { // copy from SetProperties ;-)
1145 int indexOfColon = decl.Name.IndexOf (':');
1147 if (indexOfColon == -1) {
1148 decl.Prefix = String.Empty;
1149 decl.LocalName = decl.Name;
1151 decl.Prefix = decl.Name.Substring (0, indexOfColon);
1152 decl.LocalName = decl.Name.Substring (indexOfColon + 1);
1156 decl.Prefix = String.Empty;
1157 decl.LocalName = decl.Name;
1161 if(PeekChar () == 'P') {
1162 decl.PublicId = ReadPubidLiteral ();
1163 bool wsSkipped = SkipWhitespace ();
1164 if (PeekChar () == '\'' || PeekChar () == '"') {
1166 throw NotWFError ("Whitespace is required between public id and system id.");
1167 decl.SystemId = ReadSystemLiteral (false);
1170 } else if(PeekChar () == 'S') {
1171 decl.SystemId = ReadSystemLiteral (true);
1174 if(decl.PublicId == null && decl.SystemId == null)
1175 throw NotWFError ("public or system declaration required for \"NOTATION\" declaration.");
1176 // This expanding is only allowed as a non-validating parser.
1182 private void ReadExternalID () {
1183 switch (PeekChar ()) {
1185 cachedSystemId = ReadSystemLiteral (true);
1188 cachedPublicId = ReadPubidLiteral ();
1189 if (!SkipWhitespace ())
1190 throw NotWFError ("Whitespace is required between PUBLIC id and SYSTEM id.");
1191 cachedSystemId = ReadSystemLiteral (false);
1196 // The reader is positioned on the first 'S' of "SYSTEM".
1197 private string ReadSystemLiteral (bool expectSYSTEM)
1201 if (!SkipWhitespace ())
1202 throw NotWFError ("Whitespace is required after 'SYSTEM'.");
1206 int quoteChar = ReadChar (); // apos or quot
1208 ClearValueBuffer ();
1209 while (c != quoteChar) {
1212 throw NotWFError ("Unexpected end of stream in ExternalID.");
1214 AppendValueChar (c);
1216 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1219 private string ReadPubidLiteral()
1222 if (!SkipWhitespace ())
1223 throw NotWFError ("Whitespace is required after 'PUBLIC'.");
1224 int quoteChar = ReadChar ();
1226 ClearValueBuffer ();
1227 while(c != quoteChar)
1230 if(c < 0) throw NotWFError ("Unexpected end of stream in ExternalID.");
1231 if(c != quoteChar && !XmlChar.IsPubidChar (c))
1232 throw NotWFError (String.Format ("character '{0}' not allowed for PUBLIC ID", (char) c));
1234 AppendValueChar (c);
1236 return CreateValueString (); //currentTag.ToString (startPos, currentTag.Length - 1 - startPos);
1239 // The reader is positioned on the first character
1241 internal string ReadName ()
1243 return ReadNameOrNmToken(false);
1246 // The reader is positioned on the first character
1248 private string ReadNmToken ()
1250 return ReadNameOrNmToken(true);
1253 private string ReadNameOrNmToken(bool isNameToken)
1255 int ch = PeekChar ();
1257 if (!XmlChar.IsNameChar (ch))
1258 throw NotWFError (String.Format ("a nmtoken did not start with a legal character {0} ({1})", ch, (char) ch));
1261 if (!XmlChar.IsFirstNameChar (ch))
1262 throw NotWFError (String.Format ("a name did not start with a legal character {0} ({1})", ch, (char) ch));
1267 AppendNameChar (ReadChar ());
1269 while (XmlChar.IsNameChar (PeekChar ())) {
1270 AppendNameChar (ReadChar ());
1273 return CreateNameString ();
1276 // Read the next character and compare it against the
1277 // specified character.
1278 private void Expect (int expected)
1280 int ch = ReadChar ();
1282 if (ch != expected) {
1283 throw NotWFError (String.Format (CultureInfo.InvariantCulture,
1284 "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
1292 private void Expect (string expected)
1294 int len = expected.Length;
1295 for (int i=0; i< len; i++)
1296 Expect (expected [i]);
1299 private void ExpectAfterWhitespace (char c)
1302 int i = ReadChar ();
1303 if (XmlChar.IsWhitespace (i))
1306 throw NotWFError (String.Format (CultureInfo.InvariantCulture, "Expected {0} but found {1} [{2}].", c, (char) i, i));
1311 // Does not consume the first non-whitespace character.
1312 private bool SkipWhitespace ()
1314 bool skipped = XmlChar.IsWhitespace (PeekChar ());
1315 while (XmlChar.IsWhitespace (PeekChar ()))
1320 private int PeekChar ()
1322 return currentInput.PeekChar ();
1325 private int ReadChar ()
1327 return currentInput.ReadChar ();
1330 // The reader is positioned on the first character after
1331 // the leading '<!--'.
1332 private void ReadComment ()
1334 currentInput.AllowTextDecl = false;
1336 while (PeekChar () != -1) {
1337 int ch = ReadChar ();
1339 if (ch == '-' && PeekChar () == '-') {
1342 if (PeekChar () != '>')
1343 throw NotWFError ("comments cannot contain '--'");
1349 if (XmlChar.IsInvalid (ch))
1350 throw NotWFError ("Not allowed character was found.");
1354 // The reader is positioned on the first character
1357 // It may be xml declaration or processing instruction.
1358 private void ReadProcessingInstruction ()
1360 string target = ReadName ();
1361 if (target == "xml") {
1362 ReadTextDeclaration ();
1364 } else if (String.Compare (target, "xml", true, CultureInfo.InvariantCulture) == 0)
1365 throw NotWFError ("Not allowed processing instruction name which starts with 'X', 'M', 'L' was found.");
1367 currentInput.AllowTextDecl = false;
1369 if (!SkipWhitespace ())
1370 if (PeekChar () != '?')
1371 throw NotWFError ("Invalid processing instruction name was found.");
1373 while (PeekChar () != -1) {
1374 int ch = ReadChar ();
1376 if (ch == '?' && PeekChar () == '>') {
1383 // The reader is positioned after "<?xml "
1384 private void ReadTextDeclaration ()
1386 if (!currentInput.AllowTextDecl)
1387 throw NotWFError ("Text declaration cannot appear in this state.");
1389 currentInput.AllowTextDecl = false;
1394 if (PeekChar () == 'v') {
1396 ExpectAfterWhitespace ('=');
1398 int quoteChar = ReadChar ();
1399 char [] expect1_0 = new char [3];
1400 int versionLength = 0;
1401 switch (quoteChar) {
1404 while (PeekChar () != quoteChar) {
1405 if (PeekChar () == -1)
1406 throw NotWFError ("Invalid version declaration inside text declaration.");
1407 else if (versionLength == 3)
1408 throw NotWFError ("Invalid version number inside text declaration.");
1410 expect1_0 [versionLength] = (char) ReadChar ();
1412 if (versionLength == 3 && new String (expect1_0) != "1.0")
1413 throw NotWFError ("Invalid version number inside text declaration.");
1420 throw NotWFError ("Invalid version declaration inside text declaration.");
1424 if (PeekChar () == 'e') {
1425 Expect ("encoding");
1426 ExpectAfterWhitespace ('=');
1428 int quoteChar = ReadChar ();
1429 switch (quoteChar) {
1432 while (PeekChar () != quoteChar)
1433 if (ReadChar () == -1)
1434 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1439 throw NotWFError ("Invalid encoding declaration inside text declaration.");
1441 // Encoding value should be checked inside XmlInputStream.
1444 throw NotWFError ("Encoding declaration is mandatory in text declaration.");
1449 // Note that now this method behaves differently from
1450 // XmlTextReader's one. It calles AppendValueChar() internally.
1451 private int ReadCharacterReference ()
1455 if (PeekChar () == 'x') {
1458 while (PeekChar () != ';' && PeekChar () != -1) {
1459 int ch = ReadChar ();
1461 if (ch >= '0' && ch <= '9')
1462 value = (value << 4) + ch - '0';
1463 else if (ch >= 'A' && ch <= 'F')
1464 value = (value << 4) + ch - 'A' + 10;
1465 else if (ch >= 'a' && ch <= 'f')
1466 value = (value << 4) + ch - 'a' + 10;
1468 throw NotWFError (String.Format (
1469 CultureInfo.InvariantCulture,
1470 "invalid hexadecimal digit: {0} (#x{1:X})",
1475 while (PeekChar () != ';' && PeekChar () != -1) {
1476 int ch = ReadChar ();
1478 if (ch >= '0' && ch <= '9')
1479 value = value * 10 + ch - '0';
1481 throw NotWFError (String.Format (
1482 CultureInfo.InvariantCulture,
1483 "invalid decimal digit: {0} (#x{1:X})",
1491 // There is no way to save surrogate pairs...
1492 if (XmlChar.IsInvalid (value))
1493 throw NotWFError ("Referenced character was not allowed in XML.");
1494 AppendValueChar (value);
1498 private void AppendNameChar (int ch)
1500 CheckNameCapacity ();
1501 if (ch < Char.MaxValue)
1502 nameBuffer [nameLength++] = (char) ch;
1504 nameBuffer [nameLength++] = (char) (ch / 0x10000 + 0xD800 - 1);
1505 CheckNameCapacity ();
1506 nameBuffer [nameLength++] = (char) (ch % 0x10000 + 0xDC00);
1510 private void CheckNameCapacity ()
1512 if (nameLength == nameCapacity) {
1513 nameCapacity = nameCapacity * 2;
1514 char [] oldNameBuffer = nameBuffer;
1515 nameBuffer = new char [nameCapacity];
1516 Array.Copy (oldNameBuffer, nameBuffer, nameLength);
1520 private string CreateNameString ()
1522 return DTD.NameTable.Add (nameBuffer, 0, nameLength);
1525 private void AppendValueChar (int ch)
1527 //See http://www.faqs.org/rfcs/rfc2781.html for used algorithm
1529 valueBuffer.Append ((char) ch);
1533 throw new XmlException ("The numeric entity value is too large", null, LineNumber, LinePosition);
1536 int utag = ch - 0x10000;
1537 valueBuffer.Append((char) ((utag >> 10) + 0xD800));
1538 valueBuffer.Append((char) ((utag & 0x3FF) + 0xDC00));
1542 private string CreateValueString ()
1544 return valueBuffer.ToString ();
1547 private void ClearValueBuffer ()
1549 valueBuffer.Length = 0;
1552 // The reader is positioned on the quote character.
1553 // *Keeps quote char* to value to get_QuoteChar() correctly.
1554 private string ReadDefaultAttribute ()
1556 ClearValueBuffer ();
1560 int quoteChar = ReadChar ();
1562 if (quoteChar != '\'' && quoteChar != '\"')
1563 throw NotWFError ("an attribute value was not quoted");
1565 AppendValueChar (quoteChar);
1567 while (PeekChar () != quoteChar) {
1568 int ch = ReadChar ();
1573 throw NotWFError ("attribute values cannot contain '<'");
1575 throw NotWFError ("unexpected end of file in an attribute value");
1577 AppendValueChar (ch);
1578 if (PeekChar () == '#')
1580 // Check XML 1.0 section 3.1 WFC.
1581 string entName = ReadName ();
1583 if (XmlChar.GetPredefinedEntity (entName) < 0) {
1584 DTDEntityDeclaration entDecl =
1585 DTD == null ? null : DTD.EntityDecls [entName];
1586 if (entDecl == null || entDecl.SystemId != null)
1587 // WFC: Entity Declared (see 4.1)
1588 if (DTD.IsStandalone || (DTD.SystemId == null && !DTD.InternalSubsetHasPEReference))
1589 throw NotWFError ("Reference to external entities is not allowed in attribute value.");
1591 valueBuffer.Append (entName);
1592 AppendValueChar (';');
1595 AppendValueChar (ch);
1600 ReadChar (); // quoteChar
1601 AppendValueChar (quoteChar);
1603 return CreateValueString ();
1606 private void PushParserInput (string url)
1610 if (DTD.BaseURI != null && DTD.BaseURI.Length > 0)
1611 baseUri = new Uri (DTD.BaseURI);
1612 } catch (UriFormatException) {
1615 Uri absUri = url != null && url.Length > 0 ?
1616 DTD.Resolver.ResolveUri (baseUri, url) : baseUri;
1617 string absPath = absUri != null ? absUri.ToString () : String.Empty;
1619 foreach (XmlParserInput i in parserInputStack.ToArray ()) {
1620 if (i.BaseURI == absPath)
1621 throw NotWFError ("Nested inclusion is not allowed: " + url);
1623 parserInputStack.Push (currentInput);
1626 s = DTD.Resolver.GetEntity (absUri, null, typeof (Stream)) as Stream;
1627 currentInput = new XmlParserInput (new XmlStreamReader (s), absPath);
1628 } catch (Exception ex) { // FIXME: (wishlist) Bad exception catch ;-(
1631 int line = currentInput == null ? 0 : currentInput.LineNumber;
1632 int col = currentInput == null ? 0 : currentInput.LinePosition;
1633 string bu = (currentInput == null) ? String.Empty : currentInput.BaseURI;
1634 HandleError (new XmlSchemaException ("Specified external entity not found. Target URL is " + url + " .",
1635 line, col, null, bu, ex));
1636 currentInput = new XmlParserInput (new StringReader (String.Empty), absPath);
1640 private void PopParserInput ()
1642 currentInput.Close ();
1643 currentInput = parserInputStack.Pop () as XmlParserInput;
1646 private void HandleError (XmlSchemaException ex)
1648 #if DTD_HANDLE_EVENTS
1649 if (this.ValidationEventHandler != null)
1650 ValidationEventHandler (this, new ValidationEventArgs (ex, ex.Message, XmlSeverityType.Error));