5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 using System.Collections;
31 using System.Collections.Generic;
33 using System.Security.Policy;
35 using System.Xml.Query;
36 using System.Xml.Schema;
37 using System.Xml.XPath;
38 using Mono.Xml.XQuery;
39 using Mono.Xml.XPath2;
43 namespace Mono.Xml.XPath2.Parser
45 namespace Mono.Xml.XQuery.Parser
48 // FIXME: make internal in the future
49 public class XQueryTokenizer : yyParser.yyInput, IXmlLineInfo
53 bool nextIncrementLine;
56 XmlNamespaceManager nsResolver;
57 string defaultFunctionNamespace = XQueryFunction.Namespace;
68 int lookAheadToken = -1;
69 object lookAheadTokenValue;
72 WhitespaceHandling ws = WhitespaceHandling.Arbitrary;
73 ParseState state = ParseState.Default;
76 char [] buffer = new char [30];
79 public XQueryTokenizer (TextReader reader)
83 stateStack = new Stack ();
85 nsResolver = new XmlNamespaceManager (new NameTable ());
86 nsResolver.AddNamespace ("xs", XmlSchema.Namespace);
87 nsResolver.AddNamespace ("xdt", InternalPool.XdtNamespace);
88 // FIXME: Are they really predefined?
89 nsResolver.AddNamespace ("xsi", XmlSchema.InstanceNamespace);
90 nsResolver.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
91 nsResolver.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
94 internal IXmlNamespaceResolver NSResolver {
95 get { return nsResolver; }
98 internal string DefaultFunctionNamespace {
99 get { return defaultFunctionNamespace; }
100 set { defaultFunctionNamespace = value; }
103 public void AddNamespace (string prefix, string ns)
105 nsResolver.AddNamespace (prefix, ns);
108 public bool advance ()
110 if (currentToken < 0)
112 if (lookAheadToken >= 0) {
113 tokenValue = lookAheadTokenValue;
114 currentToken = lookAheadToken;
118 currentToken = ParseToken ();
119 return currentToken >= 0;
127 public object value ()
132 public bool HasLineInfo ()
137 public int LineNumber {
141 public int LinePosition {
142 get { return column; }
145 internal WhitespaceHandling Space {
150 internal ParseState State {
151 get { return state; }
153 // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
154 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
159 internal void PushState (ParseState newState)
161 stateStack.Push (newState);
162 // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
163 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
166 internal void PopState ()
168 if (stateStack.Count == 0)
169 throw Error ("Internal state transition error. State stack is empty.");
170 state = (ParseState) stateStack.Pop ();
171 // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
172 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
175 private XmlQueryCompileException Error (string message)
177 return new XmlQueryCompileException (message, this, null, null);
180 private int ParseToken ()
185 case ParseState.StartTag:
193 case WhitespaceHandling.Arbitrary:
196 case WhitespaceHandling.Explicit:
197 if (!XmlChar.IsWhitespace (PeekChar ()))
198 throw Error ("Whitespace is required.");
199 goto case WhitespaceHandling.Arbitrary;
207 // FIXME: consider DOUBLE_LITERAL
208 if (Char.IsNumber ((char) c)) {
209 tokenValue = ReadDecimal (false);
210 return Token.DECIMAL_LITERAL;
214 case ParseState.OccurenceIndicator:
215 return ParseOccurenceIndicator ();
216 case ParseState.XmlPIContent:
217 return ParseXmlPIContent ();
218 case ParseState.XmlComment:
219 return ParseXmlCommentContent ();
220 case ParseState.ElementContent:
221 return ParseElementContent ();
222 case ParseState.StartTag:
223 return ParseStartTag ();
224 case ParseState.QuotAttributeContent:
225 return ParseAttributeContent ('"');
226 case ParseState.AposAttributeContent:
227 return ParseAttributeContent ('\'');
229 return ParseDefault ();
233 private int ParseXQueryComment ()
238 throw Error ("Unexpected end of query text inside XML processing instruction content");
240 if (PeekChar () == ')') {
242 tokenValue = CreateValueString ();
243 return Token.XML_PI_TO_END;
249 AddValueChar ((char) c);
253 private int ParseXmlPIContent ()
258 throw Error ("Unexpected end of query text inside XML processing instruction content");
260 if (PeekChar () == '>') {
262 tokenValue = CreateValueString ();
263 return Token.XML_PI_TO_END;
269 AddValueChar ((char) c);
273 private int ParseXmlCommentContent ()
275 // FIXME: handle ---> correctly
279 throw Error ("Unexpected end of query text inside XML comment content");
281 if (PeekChar () == '-') {
283 if (PeekChar () == '>') {
284 tokenValue = CreateValueString ();
285 return Token.XML_COMMENT_TO_END;
295 AddValueChar ((char) c);
299 private int ParseXmlCDataContent ()
301 // FIXME: handle ]]]> correctly
305 throw Error ("Unexpected end of query text inside XML CDATA section content");
308 if (PeekChar () == ']') {
310 if (PeekChar () == '>') {
311 tokenValue = CreateValueString ();
312 return Token.XML_CDATA_TO_END;
322 AddValueChar ((char) c);
326 private int ParseElementContent ()
331 throw Error ("Unexpected end of query text inside XML processing instruction content");
335 return ParseDefault ();
341 throw Error ("Unexpected end of query text inside XML processing instruction content");
345 ReadPredefinedEntity ();
348 tokenValue += CreateValueString ();
349 return Token.ELEM_CONTENT_LITERAL;
351 AddValueChar ((char) c);
358 private void ReadPredefinedEntity ()
360 string token = ReadOneToken ();
379 throw Error (String.Format ("Unexpected general entity name: {0} .", token));
383 // FIXME: not used as yet
384 private int ParseExtContent ()
386 // FIXME: handle :::) correctly
390 throw Error ("Unexpected end of query text inside external content");
393 if (PeekChar () == ':') {
395 if (PeekChar () == ')') {
396 tokenValue = CreateValueString ();
397 return Token.EXT_CONTENT;
407 AddValueChar ((char) c);
411 private int ParseOccurenceIndicator ()
413 state = ParseState.Operator;
414 switch (PeekChar ()) {
417 return Token.QUESTION;
420 return Token.ASTERISK;
425 return ParseOperator ();
429 private int ParseStartTag ()
441 return Token.GREATER;
445 return Token.EMPTY_TAG_CLOSE;
447 // FIXME: there seems a bug in the spec that StartTag
448 // state must accept QName without heading space for
450 // if (!XmlChar.IsWhitespace (PeekChar ()))
451 // throw Error ("Whitespace is required.");
453 return ParseDefault (); // only QName is allowed here.
456 private int ParseAttributeContent (char closeChar)
458 int t = Token.ATT_VALUE_LITERAL;
462 throw Error ("Unexpected end of attribute value content.");
463 if (c == closeChar) {
466 if (c == closeChar) {
468 AddValueChar (closeChar);
471 t = closeChar == '"' ? Token.QUOT : Token.APOS;
481 t = Token.OPEN_CURLY;
484 AddValueChar ((char) ReadChar ());
486 if (t != Token.ATT_VALUE_LITERAL) {
487 if (bufferIndex > 0) {
489 tokenValue = CreateValueString ();
490 return Token.ATT_VALUE_LITERAL;
498 private int ParseOperator ()
501 return ParseDefault ();
504 private int ParseDefault ()
509 if (PeekChar () == '.') {
513 else if (Char.IsNumber ((char) PeekChar ())) {
514 tokenValue = ReadDecimal (true);
520 return Token.SEMICOLON;
522 if (PeekChar () == ':') {
524 if (PeekChar () == ':') {
526 return Token.PRAGMA_OPEN;
528 ParseXQueryComment ();
529 return ParseToken (); // start again
531 return Token.OPEN_PAREN;
533 return Token.CLOSE_PAREN;
535 switch (PeekChar ()) {
538 if (PeekChar () == ')') {
540 return Token.PRAGMA_CLOSE;
545 return Token.CLOSE_PAREN_COLON;
548 return Token.COLON_EQUAL;
552 return Token.OPEN_BRACKET;
554 return Token.CLOSE_BRACKET;
556 return Token.OPEN_CURLY;
558 return Token.CLOSE_CURLY;
562 tokenValue = ReadQuoted ('\'');
563 return Token.STRING_LITERAL;
565 tokenValue = ReadQuoted ('"');
566 return Token.STRING_LITERAL;
570 // only happens when state is ElementContent
571 // (otherwise it might be "/foo</bar")
572 if (state == ParseState.ElementContent) {
573 switch ((char) PeekChar ()) {
576 return Token.END_TAG_START;
579 switch (PeekChar ()) {
582 if (ReadChar () != '-')
583 throw Error ("Invalid sequence of characters '<!-'.");
585 return Token.XML_COMMENT_START;
589 return Token.XML_CDATA_START;
591 throw Error ("Invalid sequence of characters '<!'.");
594 return Token.XML_PI_START;
600 switch (PeekChar ()) {
603 return Token.LESSER2;
606 return Token.LESSER_EQUAL;
610 switch (PeekChar ()) {
613 return Token.GREATER2;
616 return Token.GREATER_EQUAL;
618 return Token.GREATER;
622 if (PeekChar () == ':') {
625 tokenValue = new XmlQualifiedName (ReadOneToken (), "*");
626 return Token.WILD_PREFIX;
628 return Token.ASTERISK;
634 // only happens when state is StartTag
635 // (otherwise it might be "/>$extvar")
636 if (state == ParseState.StartTag && PeekChar () == '>') {
638 return Token.EMPTY_TAG_CLOSE;
640 if (PeekChar () == '/') {
646 return Token.QUESTION;
653 string name = ReadOneToken ();
656 bool validKeyword = false;
659 case ParseState.XmlSpaceDecl:
662 return Token.PRESERVE;
667 case ParseState.CloseKindTest:
668 if (name == "nillable")
669 return Token.NILLABLE;
671 case ParseState.ExtKey:
676 return Token.EXTENSION;
679 case ParseState.KindTest:
682 return Token.CONTEXT;
684 return Token.ELEMENT;
691 case ParseState.ItemType:
694 return Token.ATTRIBUTE;
696 return Token.COMMENT;
697 case "document-node":
698 return Token.DOCUMENT_NODE;
700 return Token.ELEMENT;
707 case "processing-instruction":
708 return Token.PROCESSING_INSTRUCTION;
713 case ParseState.NamespaceKeyword:
716 return Token.DECLARE;
718 return Token.DEFAULT;
720 return Token.ELEMENT;
722 return Token.FUNCTION;
724 return Token.NAMESPACE;
727 case ParseState.OccurenceIndicator:
728 case ParseState.Operator:
797 case ParseState.Default:
800 case "ancestor-or-self":
811 case "descendant-or-self":
813 case "document-node":
817 case "following-sibling":
831 case "preceding-sibling":
832 case "processing-instruction":
857 return Token.VERSION;
861 return Token.EXTENSION;
865 return Token.NAMESPACE;
867 return Token.DECLARE;
869 return Token.XMLSPACE;
871 return Token.PRESERVE;
875 return Token.DEFAULT;
877 return Token.CONSTRUCTION;
879 return Token.ORDERING;
881 return Token.ORDERED;
883 return Token.UNORDERED;
884 case "document-node":
885 return Token.DOCUMENT_NODE;
887 return Token.DOCUMENT;
889 return Token.ELEMENT;
891 return Token.ATTRIBUTE;
892 case "processing-instruction":
893 return Token.PROCESSING_INSTRUCTION;
895 return Token.COMMENT;
901 return Token.FUNCTION;
903 return Token.COLLATION;
905 return Token.BASEURI;
913 return Token.VARIABLE;
917 return Token.EXTERNAL;
919 return Token.VALIDATION;
943 return Token.ASCENDING;
945 return Token.DESCENDING;
949 return Token.GREATEST;
957 return Token.SATISFIES;
979 return Token.INSTANCE;
989 return Token.TYPESWITCH;
995 return Token.CASTABLE;
1007 return Token.INTERSECT;
1009 return Token.EXCEPT;
1011 return Token.VALIDATE;
1013 return Token.CONTEXT;
1015 return Token.NILLABLE;
1019 return Token.GLOBAL;
1025 return Token.DESCENDANT;
1028 case "descendant-or-self":
1029 return Token.DESCENDANT_OR_SELF;
1030 case "following-sibling":
1031 return Token.FOLLOWING_SIBLING;
1033 return Token.FOLLOWING;
1035 return Token.PARENT;
1037 return Token.ANCESTOR;
1039 return Token.PRECEDING;
1040 case "preceding-sibling":
1041 return Token.PRECEDING_SIBLING;
1042 case "ancestor-or-self":
1043 return Token.ANCESTOR_OR_SELF;
1048 case ParseState.NamespaceDecl:
1049 case ParseState.NamespaceKeyword:
1050 case ParseState.XmlSpaceDecl:
1051 case ParseState.KindTestForPI:
1052 case ParseState.XmlPI:
1053 return Token.NCNAME;
1056 if (PeekChar () == ':') {
1059 switch (PeekChar ()) {
1064 case '=': // ex. let foo:= ...
1066 tokenValue = new XmlQualifiedName (name, nsResolver.DefaultNamespace);
1067 lookAheadToken = Token.COLON_EQUAL;
1070 name = ReadOneToken ();
1074 string ns = nsResolver.LookupNamespace (prefixName);
1076 throw Error (String.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName));
1077 tokenValue = new XmlQualifiedName (name, ns);
1079 return name == "*" ? Token.WILD_LOCALNAME : Token.QNAME;
1081 tokenValue = new XmlQualifiedName (name);
1085 private int PeekChar ()
1088 peekChar = source.Read ();
1092 private int ReadChar ()
1095 if (peekChar != -1) {
1100 ret = source.Read ();
1102 if (nextIncrementLine) {
1105 nextIncrementLine = false;
1112 nextIncrementLine = true;
1121 private void SkipWhitespaces ()
1124 switch (PeekChar ()) {
1137 private void AddValueChar (char c)
1139 if (bufferIndex == buffer.Length) {
1140 char [] newBuf = new char [bufferIndex * 2];
1141 Array.Copy (buffer, newBuf, bufferIndex);
1144 buffer [bufferIndex++] = c;
1147 private string CreateValueString ()
1149 return new string (buffer, 0, bufferIndex);
1152 private void Expect (string expected)
1154 for (int i = 0; i < expected.Length; i++)
1155 if (ReadChar () != expected [i])
1156 throw Error (String.Format ("Expected token '{0}' did not appear.", expected));
1159 // TODO: parse three quoted
1160 private string ReadQuoted (char quoteChar)
1165 int c = ReadChar ();
1169 if (quoteChar == '"')
1173 if (quoteChar == '\'')
1177 AddValueChar ((char) c);
1182 return CreateValueString ();
1185 private decimal ReadDecimal (bool floatingPoint)
1190 int c = PeekChar ();
1194 // FIXME: more complex
1195 else if (Char.IsNumber ((char) c) || c == '.') {
1197 AddValueChar ((char) c);
1203 string s = (floatingPoint ? "." : "") + CreateValueString ();
1204 return decimal.Parse (s);
1207 private string ReadOneToken ()
1212 int c = PeekChar ();
1222 if (!IsTokenContinuable (c)) {
1224 if (prefixName != null)
1225 throw new XmlQueryCompileException ("Invalid colon was found.");
1226 prefixName = CreateValueString ();
1233 AddValueChar ((char) c);
1238 return CreateValueString ();
1241 private bool IsTokenContinuable (int c)
1249 return XmlChar.IsNCNameChar (c);
1254 public enum WhitespaceHandling {
1260 public enum ParseState {
1282 QuotAttributeContent,
1283 AposAttributeContent,