Merge pull request #2922 from akoeplinger/fix-tvos
[mono.git] / mcs / class / Mono.Xml.Ext / Mono.Xml.XPath2 / TokenizerBase.cs
1 //
2 // XQueryTokenizer.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 using System;
30 using System.Collections;
31 using System.Collections.Generic;
32 using System.IO;
33 using System.Security.Policy;
34 using System.Xml;
35 using System.Xml.Query;
36 using System.Xml.Schema;
37 using System.Xml.XPath;
38 using Mono.Xml.XQuery;
39 using Mono.Xml.XPath2;
40 using Mono.Xml;
41
42 #if XPATH2_PARSER
43 namespace Mono.Xml.XPath2.Parser
44 #elif XQUERY_PARSER
45 namespace Mono.Xml.XQuery.Parser
46 #endif
47 {
48         // FIXME: make internal in the future
49         public class XQueryTokenizer : yyParser.yyInput, IXmlLineInfo
50         {
51                 int line = 1;
52                 int column = 0;
53                 bool nextIncrementLine;
54
55                 // namespace resolver
56                 XmlNamespaceManager nsResolver;
57                 string defaultFunctionNamespace = XQueryFunction.Namespace;
58
59                 // input source
60                 TextReader source;
61                 int peekChar = -1;
62
63                 // token info
64                 int currentToken;
65                 string prefixName;
66                 object tokenValue;
67
68                 int lookAheadToken = -1;
69                 object lookAheadTokenValue;
70
71                 // state info
72                 WhitespaceHandling ws = WhitespaceHandling.Arbitrary;
73                 ParseState state = ParseState.Default;
74                 Stack stateStack;
75
76                 char [] buffer = new char [30];
77                 int bufferIndex;
78
79                 public XQueryTokenizer (TextReader reader)
80                 {
81                         this.source = reader;
82
83                         stateStack = new Stack ();
84
85                         nsResolver = new XmlNamespaceManager (new NameTable ());
86                         nsResolver.AddNamespace ("xs", XmlSchema.Namespace);
87                         nsResolver.AddNamespace ("xdt", InternalPool.XdtNamespace);
88                         // FIXME: Are they really predefined?
89                         nsResolver.AddNamespace ("xsi", XmlSchema.InstanceNamespace);
90                         nsResolver.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
91                         nsResolver.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
92                 }
93
94                 internal IXmlNamespaceResolver NSResolver {
95                         get { return nsResolver; }
96                 }
97
98                 internal string DefaultFunctionNamespace {
99                         get { return defaultFunctionNamespace; }
100                         set { defaultFunctionNamespace = value; }
101                 }
102
103                 public void AddNamespace (string prefix, string ns)
104                 {
105                         nsResolver.AddNamespace (prefix, ns);
106                 }
107
108                 public bool advance ()
109                 {
110                         if (currentToken < 0)
111                                 return false;
112                         if (lookAheadToken >= 0) {
113                                 tokenValue = lookAheadTokenValue;
114                                 currentToken = lookAheadToken;
115                                 lookAheadToken = -1;
116                         }
117                         else
118                                 currentToken = ParseToken ();
119                         return currentToken >= 0;
120                 }
121
122                 public int token ()
123                 {
124                         return currentToken;
125                 }
126
127                 public object value ()
128                 {
129                         return tokenValue;
130                 }
131
132                 public bool HasLineInfo ()
133                 {
134                         return true;
135                 }
136
137                 public int LineNumber {
138                         get { return line; }
139                 }
140
141                 public int LinePosition {
142                         get { return column; }
143                 }
144
145                 internal WhitespaceHandling Space {
146                         get { return ws; }
147                         set { ws = value; }
148                 }
149
150                 internal ParseState State {
151                         get { return state; }
152                         set {
153 //                              Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
154 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
155                                 state = value;
156                         }
157                 }
158
159                 internal void PushState (ParseState newState)
160                 {
161                         stateStack.Push (newState);
162 //                      Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
163 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
164                 }
165
166                 internal void PopState ()
167                 {
168                         if (stateStack.Count == 0)
169                                 throw Error ("Internal state transition error. State stack is empty.");
170                         state = (ParseState) stateStack.Pop ();
171 //                      Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
172 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
173                 }
174
175                 private XmlQueryCompileException Error (string message)
176                 {
177                         return new XmlQueryCompileException (message, this, null, null);
178                 }
179
180                 private int ParseToken ()
181                 {
182                         bufferIndex = 0;
183
184                         switch (state) {
185                         case ParseState.StartTag:
186                                 break;
187                         default:
188                                 SkipWhitespaces ();
189                                 break;
190                         }
191                         /*
192                         switch (ws) {
193                         case WhitespaceHandling.Arbitrary:
194                                 SkipWhitespaces ();
195                                 break;
196                         case WhitespaceHandling.Explicit:
197                                 if (!XmlChar.IsWhitespace (PeekChar ()))
198                                         throw Error ("Whitespace is required.");
199                                 goto case WhitespaceHandling.Arbitrary;
200                         }
201                         */
202
203                         int c = PeekChar ();
204                         if (c < 0)
205                                 return -1;
206
207                         // FIXME: consider DOUBLE_LITERAL
208                         if (Char.IsNumber ((char) c)) {
209                                 tokenValue = ReadDecimal (false);
210                                 return Token.DECIMAL_LITERAL;
211                         }
212
213                         switch (state) {
214                         case ParseState.OccurenceIndicator:
215                                 return ParseOccurenceIndicator ();
216                         case ParseState.XmlPIContent:
217                                 return ParseXmlPIContent ();
218                         case ParseState.XmlComment:
219                                 return ParseXmlCommentContent ();
220                         case ParseState.ElementContent:
221                                 return ParseElementContent ();
222                         case ParseState.StartTag:
223                                 return ParseStartTag ();
224                         case ParseState.QuotAttributeContent:
225                                 return ParseAttributeContent ('"');
226                         case ParseState.AposAttributeContent:
227                                 return ParseAttributeContent ('\'');
228                         default:
229                                 return ParseDefault ();
230                         }
231                 }
232
233                 private int ParseXQueryComment ()
234                 {
235                         while (true) {
236                                 int c = ReadChar ();
237                                 if (c < 0)
238                                         throw Error ("Unexpected end of query text inside XML processing instruction content");
239                                 if (c == ':') {
240                                         if (PeekChar () == ')') {
241                                                 ReadChar ();
242                                                 tokenValue = CreateValueString ();
243                                                 return Token.XML_PI_TO_END;
244                                         }
245                                         else
246                                                 AddValueChar (':');
247                                 }
248                                 else
249                                         AddValueChar ((char) c);
250                         }
251                 }
252
253                 private int ParseXmlPIContent ()
254                 {
255                         while (true) {
256                                 int c = ReadChar ();
257                                 if (c < 0)
258                                         throw Error ("Unexpected end of query text inside XML processing instruction content");
259                                 if (c == '?') {
260                                         if (PeekChar () == '>') {
261                                                 ReadChar ();
262                                                 tokenValue = CreateValueString ();
263                                                 return Token.XML_PI_TO_END;
264                                         }
265                                         else
266                                                 AddValueChar ('?');
267                                 }
268                                 else
269                                         AddValueChar ((char) c);
270                         }
271                 }
272
273                 private int ParseXmlCommentContent ()
274                 {
275                         // FIXME: handle ---> correctly
276                         while (true) {
277                                 int c = ReadChar ();
278                                 if (c < 0)
279                                         throw Error ("Unexpected end of query text inside XML comment content");
280                                 if (c == '-') {
281                                         if (PeekChar () == '-') {
282                                                 ReadChar ();
283                                                 if (PeekChar () == '>') {
284                                                         tokenValue = CreateValueString ();
285                                                         return Token.XML_COMMENT_TO_END;
286                                                 } else {
287                                                         AddValueChar ('-');
288                                                         AddValueChar ('-');
289                                                 }
290                                         }
291                                         else
292                                                 AddValueChar ('-');
293                                 }
294                                 else
295                                         AddValueChar ((char) c);
296                         }
297                 }
298
299                 private int ParseXmlCDataContent ()
300                 {
301                         // FIXME: handle ]]]> correctly
302                         while (true) {
303                                 int c = ReadChar ();
304                                 if (c < 0)
305                                         throw Error ("Unexpected end of query text inside XML CDATA section content");
306                                 if (c == ']') {
307                                         ReadChar ();
308                                         if (PeekChar () == ']') {
309                                                 ReadChar ();
310                                                 if (PeekChar () == '>') {
311                                                         tokenValue = CreateValueString ();
312                                                         return Token.XML_CDATA_TO_END;
313                                                 } else {
314                                                         AddValueChar (']');
315                                                         AddValueChar (']');
316                                                 }
317                                         }
318                                         else
319                                                 AddValueChar (']');
320                                 }
321                                 else
322                                         AddValueChar ((char) c);
323                         }
324                 }
325
326                 private int ParseElementContent ()
327                 {
328                         tokenValue = null;
329                         int c = PeekChar ();
330                         if (c < 0)
331                                 throw Error ("Unexpected end of query text inside XML processing instruction content");
332                         switch ((char) c) {
333                         case '<':
334                         case '{':
335                                 return ParseDefault ();
336                         }
337
338                         while (true) {
339                                 c = PeekChar ();
340                                 if (c < 0)
341                                         throw Error ("Unexpected end of query text inside XML processing instruction content");
342                                 switch ((char) c) {
343                                 case '&':
344                                         ReadChar ();
345                                         ReadPredefinedEntity ();
346                                         continue;
347                                 case '<':
348                                         tokenValue += CreateValueString ();
349                                         return Token.ELEM_CONTENT_LITERAL;
350                                 default:
351                                         AddValueChar ((char) c);
352                                         ReadChar ();
353                                         continue;
354                                 }
355                         }
356                 }
357
358                 private void ReadPredefinedEntity ()
359                 {
360                         string token = ReadOneToken ();
361                         Expect (";");
362                         switch (token) {
363                         case "lt":
364                                 AddValueChar ('<');
365                                 return;
366                         case "gt":
367                                 AddValueChar ('>');
368                                 return;
369                         case "amp":
370                                 AddValueChar ('&');
371                                 return;
372                         case "quot":
373                                 AddValueChar ('"');
374                                 return;
375                         case "apos":
376                                 AddValueChar ('\'');
377                                 return;
378                         default:
379                                 throw Error (String.Format ("Unexpected general entity name: {0} .", token));
380                         }
381                 }
382
383                 // FIXME: not used as yet
384                 private int ParseExtContent ()
385                 {
386                         // FIXME: handle :::) correctly
387                         while (true) {
388                                 int c = PeekChar ();
389                                 if (c < 0)
390                                         throw Error ("Unexpected end of query text inside external content");
391                                 if (c == ':') {
392                                         ReadChar ();
393                                         if (PeekChar () == ':') {
394                                                 ReadChar ();
395                                                 if (PeekChar () == ')') {
396                                                         tokenValue = CreateValueString ();
397                                                         return Token.EXT_CONTENT;
398                                                 } else {
399                                                         AddValueChar (':');
400                                                         AddValueChar (':');
401                                                 }
402                                         }
403                                         else
404                                                 AddValueChar (':');
405                                 }
406                                 else
407                                         AddValueChar ((char) c);
408                         }
409                 }
410
411                 private int ParseOccurenceIndicator ()
412                 {
413                         state = ParseState.Operator;
414                         switch (PeekChar ()) {
415                         case '?':
416                                 ReadChar ();
417                                 return Token.QUESTION;
418                         case '*':
419                                 ReadChar ();
420                                 return Token.ASTERISK;
421                         case '+':
422                                 ReadChar ();
423                                 return Token.PLUS;
424                         default:
425                                 return ParseOperator ();
426                         }
427                 }
428
429                 private int ParseStartTag ()
430                 {
431                         int c = PeekChar ();
432                         switch (c) {
433                         case '\'':
434                                 ReadChar ();
435                                 return Token.APOS;
436                         case '"':
437                                 ReadChar ();
438                                 return Token.QUOT;
439                         case '>':
440                                 ReadChar ();
441                                 return Token.GREATER;
442                         case '/':
443                                 ReadChar ();
444                                 Expect (">");
445                                 return Token.EMPTY_TAG_CLOSE;
446                         }
447                         // FIXME: there seems a bug in the spec that StartTag
448                         // state must accept QName without heading space for
449                         // start tag name.
450 //                      if (!XmlChar.IsWhitespace (PeekChar ()))
451 //                              throw Error ("Whitespace is required.");
452                         SkipWhitespaces ();
453                         return ParseDefault (); // only QName is allowed here.
454                 }
455
456                 private int ParseAttributeContent (char closeChar)
457                 {
458                         int t = Token.ATT_VALUE_LITERAL;
459                         while (true) {
460                                 int c = PeekChar ();
461                                 if (c < 0)
462                                         throw Error ("Unexpected end of attribute value content.");
463                                 if (c == closeChar) {
464                                         ReadChar ();
465                                         c = PeekChar ();
466                                         if (c == closeChar) {
467                                                 ReadChar ();
468                                                 AddValueChar (closeChar);
469                                         }
470                                         else
471                                                 t = closeChar == '"' ? Token.QUOT : Token.APOS;
472                                 }
473                                 else if (c == '{') {
474                                         ReadChar ();
475                                         c = PeekChar ();
476                                         if (c == '{') {
477                                                 ReadChar ();
478                                                 AddValueChar ('{');
479                                         }
480                                         else
481                                                 t = Token.OPEN_CURLY;
482                                 }
483                                 else
484                                         AddValueChar ((char) ReadChar ());
485
486                                 if (t != Token.ATT_VALUE_LITERAL) {
487                                         if (bufferIndex > 0) {
488                                                 lookAheadToken = t;
489                                                 tokenValue = CreateValueString ();
490                                                 return Token.ATT_VALUE_LITERAL;
491                                         }
492                                         else
493                                                 return t;
494                                 }
495                         }
496                 }
497
498                 private int ParseOperator ()
499                 {
500                         // TODO: implement
501                         return ParseDefault ();
502                 }
503
504                 private int ParseDefault ()
505                 {
506                         int c = ReadChar ();
507                         switch (c) {
508                         case '.':
509                                 if (PeekChar () == '.') {
510                                         ReadChar ();
511                                         return Token.DOT2;
512                                 }
513                                 else if (Char.IsNumber ((char) PeekChar ())) {
514                                         tokenValue = ReadDecimal (true);
515                                 }
516                                 return Token.DOT;
517                         case ',':
518                                 return Token.COMMA;
519                         case ';':
520                                 return Token.SEMICOLON;
521                         case '(':
522                                 if (PeekChar () == ':') {
523                                         ReadChar ();
524                                         if (PeekChar () == ':') {
525                                                 ReadChar ();
526                                                 return Token.PRAGMA_OPEN;
527                                         }
528                                         ParseXQueryComment ();
529                                         return ParseToken (); // start again
530                                 }
531                                 return Token.OPEN_PAREN;
532                         case ')':
533                                 return Token.CLOSE_PAREN;
534                         case ':':
535                                 switch (PeekChar ()) {
536                                 case ':':
537                                         ReadChar ();
538                                         if (PeekChar () == ')') {
539                                                 ReadChar ();
540                                                 return Token.PRAGMA_CLOSE;
541                                         }
542                                         return Token.COLON2;
543                                 case ')':
544                                         ReadChar ();
545                                         return Token.CLOSE_PAREN_COLON;
546                                 case '=':
547                                         ReadChar ();
548                                         return Token.COLON_EQUAL;
549                                 }
550                                 return Token.COLON;
551                         case '[':
552                                 return Token.OPEN_BRACKET;
553                         case ']':
554                                 return Token.CLOSE_BRACKET;
555                         case '{':
556                                 return Token.OPEN_CURLY;
557                         case '}':
558                                 return Token.CLOSE_CURLY;
559                         case '$':
560                                 return Token.DOLLAR;
561                         case '\'':
562                                 tokenValue = ReadQuoted ('\'');
563                                 return Token.STRING_LITERAL;
564                         case '"':
565                                 tokenValue = ReadQuoted ('"');
566                                 return Token.STRING_LITERAL;
567                         case '=':
568                                 return Token.EQUAL;
569                         case '<':
570                                 // only happens when state is ElementContent 
571                                 // (otherwise it might be "/foo</bar")
572                                 if (state == ParseState.ElementContent) {
573                                         switch ((char) PeekChar ()) {
574                                         case '/':
575                                                 ReadChar ();
576                                                 return Token.END_TAG_START;
577                                         case '!':
578                                                 ReadChar ();
579                                                 switch (PeekChar ()) {
580                                                 case '-':
581                                                         ReadChar ();
582                                                         if (ReadChar () != '-')
583                                                                 throw Error ("Invalid sequence of characters '<!-'.");
584                                                         
585                                                         return Token.XML_COMMENT_START;
586                                                 case '[':
587                                                         ReadChar ();
588                                                         Expect ("CDATA[");
589                                                         return Token.XML_CDATA_START;
590                                                 }
591                                                 throw Error ("Invalid sequence of characters '<!'.");
592                                         case '?':
593                                                 ReadChar ();
594                                                 return Token.XML_PI_START;
595                                         default:
596                                                 return Token.LESSER;
597                                         }
598                                 }
599
600                                 switch (PeekChar ()) {
601                                 case '<':
602                                         ReadChar ();
603                                         return Token.LESSER2;
604                                 case '=':
605                                         ReadChar ();
606                                         return Token.LESSER_EQUAL;
607                                 }
608                                 return Token.LESSER;
609                         case '>':
610                                 switch (PeekChar ()) {
611                                 case '>':
612                                         ReadChar ();
613                                         return Token.GREATER2;
614                                 case '=':
615                                         ReadChar ();
616                                         return Token.GREATER_EQUAL;
617                                 }
618                                 return Token.GREATER;
619                         case '|':
620                                 return Token.BAR;
621                         case '*':
622                                 if (PeekChar () == ':') {
623                                         ReadChar ();
624                                         // FIXME: more check
625                                         tokenValue = new XmlQualifiedName (ReadOneToken (), "*");
626                                         return Token.WILD_PREFIX;
627                                 }
628                                 return Token.ASTERISK;
629                         case '+':
630                                 return Token.PLUS;
631                         case '-':
632                                 return Token.MINUS;
633                         case '/':
634                                 // only happens when state is StartTag
635                                 // (otherwise it might be "/>$extvar")
636                                 if (state == ParseState.StartTag && PeekChar () == '>') {
637                                         ReadChar ();
638                                         return Token.EMPTY_TAG_CLOSE;
639                                 }
640                                 if (PeekChar () == '/') {
641                                         ReadChar ();
642                                         return Token.SLASH2;
643                                 }
644                                 return Token.SLASH;
645                         case '?':
646                                 return Token.QUESTION;
647                         case '@':
648                                 return Token.AT;
649                         }
650
651                         peekChar = c;
652                         prefixName = null;
653                         string name = ReadOneToken ();
654
655                         tokenValue = name;
656                         bool validKeyword = false;
657
658                         switch (state) {
659                         case ParseState.XmlSpaceDecl:
660                                 switch (name) {
661                                 case "preserve":
662                                         return Token.PRESERVE;
663                                 case "strip":
664                                         return Token.STRIP;
665                                 }
666                                 break;
667                         case ParseState.CloseKindTest:
668                                 if (name == "nillable")
669                                         return Token.NILLABLE;
670                                 break;
671                         case ParseState.ExtKey:
672                                 switch (name) {
673                                 case "pragma":
674                                         return Token.PRAGMA;
675                                 case "extension":
676                                         return Token.EXTENSION;
677                                 }
678                                 break;
679                         case ParseState.KindTest:
680                                 switch (name) {
681                                 case "context":
682                                         return Token.CONTEXT;
683                                 case "element":
684                                         return Token.ELEMENT;
685                                 case "global":
686                                         return Token.GLOBAL;
687                                 case "type":
688                                         return Token.TYPE;
689                                 }
690                                 break;
691                         case ParseState.ItemType:
692                                 switch (name) {
693                                 case "attribute":
694                                         return Token.ATTRIBUTE;
695                                 case "comment":
696                                         return Token.COMMENT;
697                                 case "document-node":
698                                         return Token.DOCUMENT_NODE;
699                                 case "element":
700                                         return Token.ELEMENT;
701                                 case "empty":
702                                         return Token.EMPTY;
703                                 case "item":
704                                         return Token.ITEM;
705                                 case "node":
706                                         return Token.NODE;
707                                 case "processing-instruction":
708                                         return Token.PROCESSING_INSTRUCTION;
709                                 case "text":
710                                         return Token.TEXT;
711                                 }
712                                 break;
713                         case ParseState.NamespaceKeyword:
714                                 switch (name) {
715                                 case "declare":
716                                         return Token.DECLARE;
717                                 case "default":
718                                         return Token.DEFAULT;
719                                 case "element":
720                                         return Token.ELEMENT;
721                                 case "function":
722                                         return Token.FUNCTION;
723                                 case "namespace":
724                                         return Token.NAMESPACE;
725                                 }
726                                 break;
727                         case ParseState.OccurenceIndicator:
728                         case ParseState.Operator:
729                                 switch (name) {
730                                 case "and":
731                                 case "as":
732                                 case "ascending":
733                                 case "at":
734                                 case "base-uri":
735                                 case "by":
736                                 case "case":
737                                 case "cast":
738                                 case "castable":
739                                 case "collation":
740                                 case "declare":
741                                 case "default":
742                                 case "descending":
743                                 case "div":
744                                 case "element":
745                                 case "else":
746                                 case "empty":
747                                 case "eq":
748                                 case "every":
749                                 case "except":
750                                 case "external":
751                                 case "for":
752                                 case "function":
753                                 case "ge":
754                                 case "global":
755                                 case "greatest":
756                                 case "gt":
757                                 case "idiv":
758                                 case "import":
759                                 case "in":
760                                 case "instance":
761                                 case "intersect":
762                                 case "is":
763                                 case "lax":
764                                 case "le":
765                                 case "least":
766                                 case "let":
767                                 case "lt":
768                                 case "mod":
769                                 case "module":
770                                 case "namespace":
771                                 case "ne":
772                                 case "of":
773                                 case "or":
774                                 case "order":
775                                 case "ordered":
776                                 case "ordering":
777                                 case "return":
778                                 case "satisfies":
779                                 case "schema":
780                                 case "skip":
781                                 case "some":
782                                 case "stable":
783                                 case "strict":
784                                 case "then":
785                                 case "to":
786                                 case "treat":
787                                 case "typwswitch":
788                                 case "union":
789                                 case "unordered":
790                                 case "variable":
791                                 case "where":
792                                 case "xmlspace":
793                                         validKeyword = true;
794                                         break;
795                                 }
796                                 break;
797                         case ParseState.Default:
798                                 switch (name) {
799                                 case "ancestor":
800                                 case "ancestor-or-self":
801                                 case "as":
802                                 case "attribute":
803                                 case "base-uri":
804                                 case "child":
805                                 case "collation":
806                                 case "comment":
807                                 case "construction":
808                                 case "declare":
809                                 case "default":
810                                 case "descendant":
811                                 case "descendant-or-self":
812                                 case "document":
813                                 case "document-node":
814                                 case "element":
815                                 case "every":
816                                 case "following":
817                                 case "following-sibling":
818                                 case "for":
819                                 case "function":
820                                 case "global":
821                                 case "if":
822                                 case "import":
823                                 case "lax":
824                                 case "let":
825                                 case "module":
826                                 case "namespace":
827                                 case "node":
828                                 case "ordered":
829                                 case "parent":
830                                 case "preceding":
831                                 case "preceding-sibling":
832                                 case "processing-instruction":
833                                 case "schema":
834                                 case "self":
835                                 case "some":
836                                 case "strict":
837                                 case "strip":
838                                 case "text":
839                                 case "typeswitch":
840                                 case "unordered":
841                                 case "validate":
842                                 case "validation":
843                                 case "version":
844                                 case "xmlspace":
845                                 case "xquery":
846                                         validKeyword = true;
847                                         break;
848                                 }
849                                 break;
850                         }
851
852                         if (validKeyword) {
853                                 switch (name) {
854                                 case "xquery":
855                                         return Token.XQUERY;
856                                 case "version":
857                                         return Token.VERSION;
858                                 case "pragma":
859                                         return Token.PRAGMA;
860                                 case "extension":
861                                         return Token.EXTENSION;
862                                 case "module":
863                                         return Token.MODULE;
864                                 case "namespace":
865                                         return Token.NAMESPACE;
866                                 case "declare":
867                                         return Token.DECLARE;
868                                 case "xmlspace":
869                                         return Token.XMLSPACE;
870                                 case "preserve":
871                                         return Token.PRESERVE;
872                                 case "strip":
873                                         return Token.STRIP;
874                                 case "default":
875                                         return Token.DEFAULT;
876                                 case "construction":
877                                         return Token.CONSTRUCTION;
878                                 case "ordering":
879                                         return Token.ORDERING;
880                                 case "ordered":
881                                         return Token.ORDERED;
882                                 case "unordered":
883                                         return Token.UNORDERED;
884                                 case "document-node":
885                                         return Token.DOCUMENT_NODE;
886                                 case "document":
887                                         return Token.DOCUMENT;
888                                 case "element":
889                                         return Token.ELEMENT;
890                                 case "attribute":
891                                         return Token.ATTRIBUTE;
892                                 case "processing-instruction":
893                                         return Token.PROCESSING_INSTRUCTION;
894                                 case "comment":
895                                         return Token.COMMENT;
896                                 case "text":
897                                         return Token.TEXT;
898                                 case "node":
899                                         return Token.NODE;
900                                 case "function":
901                                         return Token.FUNCTION;
902                                 case "collation":
903                                         return Token.COLLATION;
904                                 case "base-uri":
905                                         return Token.BASEURI;
906                                 case "import":
907                                         return Token.IMPORT;
908                                 case "schema":
909                                         return Token.SCHEMA;
910                                 case "at":
911                                         return Token.AT;
912                                 case "variable":
913                                         return Token.VARIABLE;
914                                 case "as":
915                                         return Token.AS;
916                                 case "external":
917                                         return Token.EXTERNAL;
918                                 case "validation":
919                                         return Token.VALIDATION;
920                                 case "lax":
921                                         return Token.LAX;
922                                 case "strict":
923                                         return Token.STRICT;
924                                 case "skip":
925                                         return Token.SKIP;
926                                 case "return":
927                                         return Token.RETURN;
928                                 case "for":
929                                         return Token.FOR;
930                                 case "let":
931                                         return Token.LET;
932                                 case "in":
933                                         return Token.IN;
934                                 case "where":
935                                         return Token.WHERE;
936                                 case "order":
937                                         return Token.ORDER;
938                                 case "by":
939                                         return Token.BY;
940                                 case "stable":
941                                         return Token.STABLE;
942                                 case "ascending":
943                                         return Token.ASCENDING;
944                                 case "descending":
945                                         return Token.DESCENDING;
946                                 case "empty":
947                                         return Token.EMPTY;
948                                 case "greatest":
949                                         return Token.GREATEST;
950                                 case "least":
951                                         return Token.LEAST;
952                                 case "some":
953                                         return Token.SOME;
954                                 case "every":
955                                         return Token.EVERY;
956                                 case "satisfies":
957                                         return Token.SATISFIES;
958                                 case "is":
959                                         return Token.IS;
960                                 case "to":
961                                         return Token.TO;
962                                 case "eq":
963                                         return Token.EQ;
964                                 case "ne":
965                                         return Token.NE;
966                                 case "lt":
967                                         return Token.LT;
968                                 case "le":
969                                         return Token.LE;
970                                 case "gt":
971                                         return Token.GT;
972                                 case "ge":
973                                         return Token.GE;
974                                 case "and":
975                                         return Token.AND;
976                                 case "or":
977                                         return Token.OR;
978                                 case "instance":
979                                         return Token.INSTANCE;
980                                 case "of":
981                                         return Token.OF;
982                                 case "if":
983                                         return Token.IF;
984                                 case "then":
985                                         return Token.THEN;
986                                 case "else":
987                                         return Token.ELSE;
988                                 case "typeswitch":
989                                         return Token.TYPESWITCH;
990                                 case "case":
991                                         return Token.CASE;
992                                 case "treat":
993                                         return Token.TREAT;
994                                 case "castable":
995                                         return Token.CASTABLE;
996                                 case "cast":
997                                         return Token.CAST;
998                                 case "div":
999                                         return Token.DIV;
1000                                 case "idiv":
1001                                         return Token.IDIV;
1002                                 case "mod":
1003                                         return Token.MOD;
1004                                 case "union":
1005                                         return Token.UNION;
1006                                 case "intersect":
1007                                         return Token.INTERSECT;
1008                                 case "except":
1009                                         return Token.EXCEPT;
1010                                 case "validate":
1011                                         return Token.VALIDATE;
1012                                 case "context":
1013                                         return Token.CONTEXT;
1014                                 case "nillable":
1015                                         return Token.NILLABLE;
1016                                 case "item":
1017                                         return Token.ITEM;
1018                                 case "global":
1019                                         return Token.GLOBAL;
1020                                 case "type":
1021                                         return Token.TYPE;
1022                                 case "child":
1023                                         return Token.CHILD;
1024                                 case "descendant":
1025                                         return Token.DESCENDANT;
1026                                 case "self":
1027                                         return Token.SELF;
1028                                 case "descendant-or-self":
1029                                         return Token.DESCENDANT_OR_SELF;
1030                                 case "following-sibling":
1031                                         return Token.FOLLOWING_SIBLING;
1032                                 case "following":
1033                                         return Token.FOLLOWING;
1034                                 case "parent":
1035                                         return Token.PARENT;
1036                                 case "ancestor":
1037                                         return Token.ANCESTOR;
1038                                 case "preceding":
1039                                         return Token.PRECEDING;
1040                                 case "preceding-sibling":
1041                                         return Token.PRECEDING_SIBLING;
1042                                 case "ancestor-or-self":
1043                                         return Token.ANCESTOR_OR_SELF;
1044                                 }
1045                         }
1046
1047                         switch (state) {
1048                         case ParseState.NamespaceDecl:
1049                         case ParseState.NamespaceKeyword:
1050                         case ParseState.XmlSpaceDecl:
1051                         case ParseState.KindTestForPI:
1052                         case ParseState.XmlPI:
1053                                 return Token.NCNAME;
1054                         }
1055
1056                         if (PeekChar () == ':') {
1057                                 ReadChar ();
1058                                 prefixName = name;
1059                                 switch (PeekChar ()) {
1060                                 case '*':
1061                                         ReadChar ();
1062                                         name = "*";
1063                                         break;
1064                                 case '=': // ex. let foo:= ...
1065                                         ReadChar ();
1066                                         tokenValue = new XmlQualifiedName (name, nsResolver.DefaultNamespace);
1067                                         lookAheadToken = Token.COLON_EQUAL;
1068                                         return Token.QNAME;
1069                                 default:
1070                                         name = ReadOneToken ();
1071                                         break;
1072                                 }
1073
1074                                 string ns = nsResolver.LookupNamespace (prefixName);
1075                                 if (ns == null)
1076                                         throw Error (String.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName));
1077                                 tokenValue = new XmlQualifiedName (name, ns);
1078                                 prefixName = null;
1079                                 return name == "*" ? Token.WILD_LOCALNAME : Token.QNAME;
1080                         }
1081                         tokenValue = new XmlQualifiedName (name);
1082                         return Token.QNAME;
1083                 }
1084
1085                 private int PeekChar ()
1086                 {
1087                         if (peekChar == -1)
1088                                 peekChar = source.Read ();
1089                         return peekChar;
1090                 }
1091
1092                 private int ReadChar ()
1093                 {
1094                         int ret;
1095                         if (peekChar != -1) {
1096                                 ret = peekChar;
1097                                 peekChar = -1;
1098                         }
1099                         else
1100                                 ret = source.Read ();
1101
1102                         if (nextIncrementLine) {
1103                                 line++;
1104                                 column = 0;
1105                                 nextIncrementLine = false;
1106                         }
1107                         column++;
1108                         switch (ret) {
1109                         case '\r':
1110                                 break;
1111                         case '\n':
1112                                 nextIncrementLine = true;
1113                                 goto default;
1114                         default:
1115                                 break;
1116                         }
1117
1118                         return ret;
1119                 }
1120
1121                 private void SkipWhitespaces ()
1122                 {
1123                         while (true) {
1124                                 switch (PeekChar ()) {
1125                                 case ' ':
1126                                 case '\t':
1127                                 case '\r':
1128                                 case '\n':
1129                                         ReadChar ();
1130                                         continue;
1131                                 default:
1132                                         return;
1133                                 }
1134                         }
1135                 }
1136
1137                 private void AddValueChar (char c)
1138                 {
1139                         if (bufferIndex == buffer.Length) {
1140                                 char [] newBuf = new char [bufferIndex * 2];
1141                                 Array.Copy (buffer, newBuf, bufferIndex);
1142                                 buffer = newBuf;
1143                         }
1144                         buffer [bufferIndex++] = c;
1145                 }
1146
1147                 private string CreateValueString ()
1148                 {
1149                         return new string (buffer, 0, bufferIndex);
1150                 }
1151
1152                 private void Expect (string expected)
1153                 {
1154                         for (int i = 0; i < expected.Length; i++)
1155                                 if (ReadChar () != expected [i])
1156                                         throw Error (String.Format ("Expected token '{0}' did not appear.", expected));
1157                 }
1158
1159                 // TODO: parse three quoted
1160                 private string ReadQuoted (char quoteChar)
1161                 {
1162                         bufferIndex = 0;
1163                         bool loop = true;
1164                         do {
1165                                 int c = ReadChar ();
1166                                 switch (c) {
1167                                 case -1:
1168                                 case '"':
1169                                         if (quoteChar == '"')
1170                                                 loop = false;
1171                                         break;
1172                                 case '\'':
1173                                         if (quoteChar == '\'')
1174                                                 loop = false;
1175                                         break;
1176                                 default:
1177                                         AddValueChar ((char) c);
1178                                         break;
1179                                 }
1180                         } while (loop);
1181
1182                         return CreateValueString ();
1183                 }
1184
1185                 private decimal ReadDecimal (bool floatingPoint)
1186                 {
1187                         bufferIndex = 0;
1188                         bool cond = true;
1189                         do {
1190                                 int c = PeekChar ();
1191                                 if (c < 0) {
1192                                         cond = false;
1193                                 }
1194                                 // FIXME: more complex
1195                                 else if (Char.IsNumber ((char) c) || c == '.') {
1196                                         ReadChar ();
1197                                         AddValueChar ((char) c);
1198                                         continue;
1199                                 }
1200                                 else
1201                                         cond = false;
1202                         } while (cond);
1203                         string s = (floatingPoint ? "." : "") + CreateValueString ();
1204                         return decimal.Parse (s);
1205                 }
1206
1207                 private string ReadOneToken ()
1208                 {
1209                         bufferIndex = 0;
1210                         bool loop = true;
1211                         do {
1212                                 int c = PeekChar ();
1213                                 switch (c) {
1214                                 case -1:
1215                                 case ' ':
1216                                 case '\t':
1217                                 case '\r':
1218                                 case '\n':
1219                                         loop = false;
1220                                         break;
1221                                 default:
1222                                         if (!IsTokenContinuable (c)) {
1223                                                 if (c == ':') {
1224                                                         if (prefixName != null)
1225                                                                 throw new XmlQueryCompileException ("Invalid colon was found.");
1226                                                         prefixName = CreateValueString ();
1227                                                 }
1228                                                 loop = false;
1229                                                 break;
1230                                         }
1231
1232                                         ReadChar ();
1233                                         AddValueChar ((char) c);
1234                                         break;
1235                                 }
1236                         } while (loop);
1237
1238                         return CreateValueString ();
1239                 }
1240
1241                 private bool IsTokenContinuable (int c)
1242                 {
1243                         switch (c) {
1244                         case '-':
1245                         case '_':
1246                         case '.':
1247                                 return true;
1248                         }
1249                         return XmlChar.IsNCNameChar (c);
1250                 }
1251
1252         }
1253
1254         public enum WhitespaceHandling {
1255                 Arbitrary,
1256                 Explicit,
1257                 Significant
1258         }
1259
1260         public enum ParseState {
1261                 Default,
1262                 Operator,
1263                 NamespaceDecl,
1264                 NamespaceKeyword,
1265                 XmlSpaceDecl,
1266                 ItemType,
1267                 KindTest,
1268                 KindTestForPI,
1269                 CloseKindTest,
1270                 OccurenceIndicator,
1271                 SchemaContextStep,
1272                 VarName,
1273                 StartTag,
1274                 ElementContent,
1275                 EndTag,
1276                 XmlComment,
1277                 ExprComment,
1278                 ExtKey,
1279                 XmlPI,
1280                 XmlPIContent,
1281                 CDataSection,
1282                 QuotAttributeContent,
1283                 AposAttributeContent,
1284         }
1285
1286 }