Updates referencesource to .NET 4.7
[mono.git] / mcs / class / referencesource / System.Data.SqlXml / System / Xml / Xsl / XPath / XPathScanner.cs
1 //------------------------------------------------------------------------------
2 // <copyright file="XPathScanner.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 // <owner current="true" primary="true">Microsoft</owner>
6 // <spec>http://www.w3.org/TR/xpath#exprlex</spec>
7 //------------------------------------------------------------------------------
8
9 using System.Diagnostics;
10
11 namespace System.Xml.Xsl.XPath {
12     using Res = System.Xml.Utils.Res;
13
14     // Extends XPathOperator enumeration
15     internal enum LexKind {
16         Unknown,        // Unknown lexeme
17         Or,             // Operator 'or'
18         And,            // Operator 'and'
19         Eq,             // Operator '='
20         Ne,             // Operator '!='
21         Lt,             // Operator '<'
22         Le,             // Operator '<='
23         Gt,             // Operator '>'
24         Ge,             // Operator '>='
25         Plus,           // Operator '+'
26         Minus,          // Operator '-'
27         Multiply,       // Operator '*'
28         Divide,         // Operator 'div'
29         Modulo,         // Operator 'mod'
30         UnaryMinus,     // Not used
31         Union,          // Operator '|'
32         LastOperator    = Union,
33
34         DotDot,         // '..'
35         ColonColon,     // '::'
36         SlashSlash,     // Operator '//'
37         Number,         // Number (numeric literal)
38         Axis,           // AxisName
39
40         Name,           // NameTest, NodeType, FunctionName, AxisName, second part of VariableReference
41         String,         // Literal (string literal)
42         Eof,            // End of the expression
43
44         FirstStringable = Name,
45         LastNonChar     = Eof,
46
47         LParens     = '(',
48         RParens     = ')',
49         LBracket    = '[',
50         RBracket    = ']',
51         Dot         = '.',
52         At          = '@',
53         Comma       = ',',
54
55         Star        = '*',      // NameTest
56         Slash       = '/',      // Operator '/'
57         Dollar      = '$',      // First part of VariableReference
58         RBrace      = '}',      // Used for AVTs
59     };
60
61     internal sealed class XPathScanner {
62         private string  xpathExpr;
63         private int     curIndex;
64         private char    curChar;
65         private LexKind kind;
66         private string  name;
67         private string  prefix;
68         private string  stringValue;
69         private bool    canBeFunction;
70         private int     lexStart;
71         private int     prevLexEnd;
72         private LexKind prevKind;
73         private XPathAxis axis;
74
75         private XmlCharType xmlCharType = XmlCharType.Instance;
76
77         public XPathScanner(string xpathExpr) : this(xpathExpr, 0) {}
78
79         public XPathScanner(string xpathExpr, int startFrom) {
80             Debug.Assert(xpathExpr != null);
81             this.xpathExpr = xpathExpr;
82             this.kind = LexKind.Unknown;
83             SetSourceIndex(startFrom);
84             NextLex();
85         }
86
87         public string   Source      { get { return xpathExpr;   } }
88         public LexKind  Kind        { get { return kind;        } }
89         public int      LexStart    { get { return lexStart;    } }
90         public int      LexSize     { get { return curIndex - lexStart; } }
91         public int      PrevLexEnd  { get { return prevLexEnd;  } }
92
93         private void SetSourceIndex(int index) {
94             Debug.Assert(0 <= index && index <= xpathExpr.Length);
95             curIndex = index - 1;
96             NextChar();
97         }
98
99         private void NextChar() {
100             Debug.Assert(-1 <= curIndex && curIndex < xpathExpr.Length);
101             curIndex++;
102             if (curIndex < xpathExpr.Length) {
103                 curChar = xpathExpr[curIndex];
104             } else {
105                 Debug.Assert(curIndex == xpathExpr.Length);
106                 curChar = '\0';
107             }
108         }
109
110 #if XML10_FIFTH_EDITION
111         private char PeekNextChar() {
112             Debug.Assert(-1 <= curIndex && curIndex <= xpathExpr.Length);
113             if (curIndex + 1 < xpathExpr.Length) {
114                 return xpathExpr[curIndex + 1];
115             }
116             else {
117                 return '\0';
118             }
119         }
120 #endif
121
122         public string Name {
123             get {
124                 Debug.Assert(kind == LexKind.Name);
125                 Debug.Assert(name != null);
126                 return name;
127             }
128         }
129
130         public string Prefix {
131             get {
132                 Debug.Assert(kind == LexKind.Name);
133                 Debug.Assert(prefix != null);
134                 return prefix;
135             }
136         }
137
138         public string RawValue {
139             get {
140                 if (kind == LexKind.Eof) {
141                     return LexKindToString(kind);
142                 } else {
143                     return xpathExpr.Substring(lexStart, curIndex - lexStart);
144                 }
145             }
146         }
147
148         public string StringValue {
149             get {
150                 Debug.Assert(kind == LexKind.String);
151                 Debug.Assert(stringValue != null);
152                 return stringValue;
153             }
154         }
155
156         // Returns true if the character following an QName (possibly after intervening
157         // ExprWhitespace) is '('. In this case the token must be recognized as a NodeType
158         // or a FunctionName unless it is an OperatorName. This distinction cannot be done
159         // without knowing the previous lexeme. For example, "or" in "... or (1 != 0)" may
160         // be an OperatorName or a FunctionName.
161         public bool CanBeFunction {
162             get {
163                 Debug.Assert(kind == LexKind.Name);
164                 return canBeFunction;
165             }
166         }
167
168         public XPathAxis Axis {
169             get {
170                 Debug.Assert(kind == LexKind.Axis);
171                 Debug.Assert(axis != XPathAxis.Unknown);
172                 return axis;
173             }
174         }
175
176         private void SkipSpace() {
177             while (xmlCharType.IsWhiteSpace(curChar)) {
178                 NextChar();
179             }
180         }
181
182         private static bool IsAsciiDigit(char ch) {
183             return (uint)(ch - '0') <= 9;
184         }
185
186         public void NextLex() {
187             prevLexEnd = curIndex;
188             prevKind = kind;
189             SkipSpace();
190             lexStart = curIndex;
191
192             switch (curChar) {
193                 case '\0':
194                     kind = LexKind.Eof;
195                     return;
196                 case '(': case ')': case '[': case ']':
197                 case '@': case ',': case '$': case '}':
198                     kind = (LexKind)curChar;
199                     NextChar();
200                     break;
201                 case '.':
202                     NextChar();
203                     if (curChar == '.') {
204                         kind = LexKind.DotDot;
205                         NextChar();
206                     } else if (IsAsciiDigit(curChar)) {
207                         SetSourceIndex(lexStart);
208                         goto case '0';
209                     } else {
210                         kind = LexKind.Dot;
211                     }
212                     break;
213                 case ':':
214                     NextChar();
215                     if (curChar == ':') {
216                         kind = LexKind.ColonColon;
217                         NextChar();
218                     } else {
219                         kind = LexKind.Unknown;
220                     }
221                     break;
222                 case '*':
223                     kind = LexKind.Star;
224                     NextChar();
225                     CheckOperator(true);
226                     break;
227                 case '/':
228                     NextChar();
229                     if (curChar == '/') {
230                         kind = LexKind.SlashSlash;
231                         NextChar();
232                     } else {
233                         kind = LexKind.Slash;
234                     }
235                     break;
236                 case '|':
237                     kind = LexKind.Union;
238                     NextChar();
239                     break;
240                 case '+':
241                     kind = LexKind.Plus;
242                     NextChar();
243                     break;
244                 case '-':
245                     kind = LexKind.Minus;
246                     NextChar();
247                     break;
248                 case '=':
249                     kind = LexKind.Eq;
250                     NextChar();
251                     break;
252                 case '!':
253                     NextChar();
254                     if (curChar == '=') {
255                         kind = LexKind.Ne;
256                         NextChar();
257                     } else {
258                         kind = LexKind.Unknown;
259                     }
260                     break;
261                 case '<':
262                     NextChar();
263                     if (curChar == '=') {
264                         kind = LexKind.Le;
265                         NextChar();
266                     } else {
267                         kind = LexKind.Lt;
268                     }
269                     break;
270                 case '>':
271                     NextChar();
272                     if (curChar == '=') {
273                         kind = LexKind.Ge;
274                         NextChar();
275                     } else {
276                         kind = LexKind.Gt;
277                     }
278                     break;
279                 case '"':
280                 case '\'':
281                     kind = LexKind.String;
282                     ScanString();
283                     break;
284                 case '0': case '1': case '2': case '3':
285                 case '4': case '5': case '6': case '7':
286                 case '8': case '9':
287                     kind = LexKind.Number;
288                     ScanNumber();
289                     break;
290                 default:
291                     if (xmlCharType.IsStartNCNameSingleChar(curChar) 
292 #if XML10_FIFTH_EDITION
293                         || xmlCharType.IsNCNameHighSurrogateChar(curChar)
294 #endif
295                         ) {
296                         kind = LexKind.Name;
297                         this.name   = ScanNCName();
298                         this.prefix = string.Empty;
299                         this.canBeFunction = false;
300                         this.axis = XPathAxis.Unknown;
301                         bool colonColon = false;
302                         int saveSourceIndex = curIndex;
303
304                         // "foo:bar" or "foo:*" -- one lexeme (no spaces allowed)
305                         // "foo::" or "foo ::"  -- two lexemes, reported as one (AxisName)
306                         // "foo:?" or "foo :?"  -- lexeme "foo" reported
307                         if (curChar == ':') {
308                             NextChar();
309                             if (curChar == ':') {   // "foo::" -> OperatorName, AxisName
310                                 NextChar();
311                                 colonColon = true;
312                                 SetSourceIndex(saveSourceIndex);
313                             } else {                // "foo:bar", "foo:*" or "foo:?"
314                                 if (curChar == '*') {
315                                     NextChar();
316                                     this.prefix = this.name;
317                                     this.name = "*";
318                                 } else if (xmlCharType.IsStartNCNameSingleChar(curChar) 
319 #if XML10_FIFTH_EDITION
320                                     || xmlCharType.IsNCNameHighSurrogateChar(curChar)
321 #endif
322                                     ) {
323                                     this.prefix = this.name;
324                                     this.name = ScanNCName();
325                                     // Look ahead for '(' to determine whether QName can be a FunctionName
326                                     saveSourceIndex = curIndex;
327                                     SkipSpace();
328                                     this.canBeFunction = (curChar == '(');
329                                     SetSourceIndex(saveSourceIndex);
330                                 } else {            // "foo:?" -> OperatorName, NameTest
331                                     // Return "foo" and leave ":" to be reported later as an unknown lexeme
332                                     SetSourceIndex(saveSourceIndex);
333                                 }
334                             }
335                         } else {
336                             SkipSpace();
337                             if (curChar == ':') {   // "foo ::" or "foo :?"
338                                 NextChar();
339                                 if (curChar == ':') {
340                                     NextChar();
341                                     colonColon = true;
342                                 }
343                                 SetSourceIndex(saveSourceIndex);
344                             } else {
345                                 this.canBeFunction = (curChar == '(');
346                             }
347                         }
348                         if (!CheckOperator(false) && colonColon) {
349                             this.axis = CheckAxis();
350                         }
351                     } else {
352                         kind = LexKind.Unknown;
353                         NextChar();
354                     }
355                     break;
356             }
357         }
358
359         private bool CheckOperator(bool star) {
360             LexKind opKind;
361
362             if (star) {
363                 opKind = LexKind.Multiply;
364             } else {
365                 if (prefix.Length != 0 || name.Length > 3)
366                     return false;
367
368                 switch (name) {
369                     case "or" : opKind = LexKind.Or;      break;
370                     case "and": opKind = LexKind.And;     break;
371                     case "div": opKind = LexKind.Divide;  break;
372                     case "mod": opKind = LexKind.Modulo;  break;
373                     default   : return false;
374                 }
375             }
376
377             // If there is a preceding token and the preceding token is not one of '@', '::', '(', '[', ',' or an Operator,
378             // then a '*' must be recognized as a MultiplyOperator and an NCName must be recognized as an OperatorName.
379             if (prevKind <= LexKind.LastOperator)
380                 return false;
381
382             switch (prevKind) {
383                 case LexKind.Slash:
384                 case LexKind.SlashSlash:
385                 case LexKind.At:
386                 case LexKind.ColonColon:
387                 case LexKind.LParens:
388                 case LexKind.LBracket:
389                 case LexKind.Comma:
390                 case LexKind.Dollar:
391                     return false;
392             }
393
394             this.kind = opKind;
395             return true;
396         }
397
398         private XPathAxis CheckAxis() {
399             this.kind = LexKind.Axis;
400             switch (name) {
401                 case "ancestor"           : return XPathAxis.Ancestor;
402                 case "ancestor-or-self"   : return XPathAxis.AncestorOrSelf;
403                 case "attribute"          : return XPathAxis.Attribute;
404                 case "child"              : return XPathAxis.Child;
405                 case "descendant"         : return XPathAxis.Descendant;
406                 case "descendant-or-self" : return XPathAxis.DescendantOrSelf;
407                 case "following"          : return XPathAxis.Following;
408                 case "following-sibling"  : return XPathAxis.FollowingSibling;
409                 case "namespace"          : return XPathAxis.Namespace;
410                 case "parent"             : return XPathAxis.Parent;
411                 case "preceding"          : return XPathAxis.Preceding;
412                 case "preceding-sibling"  : return XPathAxis.PrecedingSibling;
413                 case "self"               : return XPathAxis.Self;
414                 default                   : this.kind = LexKind.Name; return XPathAxis.Unknown;
415             }
416         }
417
418         private void ScanNumber() {
419             Debug.Assert(IsAsciiDigit(curChar) || curChar == '.');
420             while (IsAsciiDigit(curChar)) {
421                 NextChar();
422             }
423             if (curChar == '.') {
424                 NextChar();
425                 while (IsAsciiDigit(curChar)) {
426                     NextChar();
427                 }
428             }
429             if ((curChar & (~0x20)) == 'E') {
430                 NextChar();
431                 if (curChar == '+' || curChar == '-') {
432                     NextChar();
433                 }
434                 while (IsAsciiDigit(curChar)) {
435                     NextChar();
436                 }
437                 throw CreateException(Res.XPath_ScientificNotation);
438             }
439         }
440
441         private void ScanString() {
442             int startIdx = curIndex + 1;
443             int endIdx = xpathExpr.IndexOf(curChar, startIdx);
444
445             if (endIdx < 0) {
446                 SetSourceIndex(xpathExpr.Length);
447                 throw CreateException(Res.XPath_UnclosedString);
448             }
449
450             this.stringValue = xpathExpr.Substring(startIdx, endIdx - startIdx);
451             SetSourceIndex(endIdx + 1);
452         }
453
454         private string ScanNCName() {
455             Debug.Assert(xmlCharType.IsStartNCNameSingleChar(curChar) 
456 #if XML10_FIFTH_EDITION
457                 || xmlCharType.IsNCNameHighSurrogateChar(curChar)
458 #endif
459                 );
460             int start = curIndex;
461             for (;;) {
462                 if (xmlCharType.IsNCNameSingleChar(curChar)) {
463                     NextChar();
464                 }
465 #if XML10_FIFTH_EDITION
466                 else if (xmlCharType.IsNCNameSurrogateChar(PeekNextChar(), curChar)) {
467                     NextChar();
468                     NextChar();
469                 }
470 #endif
471                 else {
472                     break;
473                 }
474             }
475             return xpathExpr.Substring(start, curIndex - start);
476         }
477
478         public void PassToken(LexKind t) {
479             CheckToken(t);
480             NextLex();
481         }
482
483         public void CheckToken(LexKind t) {
484             Debug.Assert(LexKind.FirstStringable <= t);
485             if (kind != t) {
486                 if (t == LexKind.Eof) {
487                     throw CreateException(Res.XPath_EofExpected, RawValue);
488                 } else {
489                     throw CreateException(Res.XPath_TokenExpected, LexKindToString(t), RawValue);
490                 }
491             }
492         }
493
494         // May be called for the following tokens: Name, String, Eof, Comma, LParens, RParens, LBracket, RBracket, RBrace
495         private string LexKindToString(LexKind t) {
496             Debug.Assert(LexKind.FirstStringable <= t);
497
498             if (LexKind.LastNonChar < t) {
499                 Debug.Assert("()[].@,*/$}".IndexOf((char)t) >= 0);
500                 return new String((char)t, 1);
501             }
502
503             switch (t) {
504                 case LexKind.Name   : return "<name>";
505                 case LexKind.String : return "<string literal>";
506                 case LexKind.Eof    : return "<eof>";
507                 default:
508                     Debug.Fail("Unexpected LexKind: " + t.ToString());
509                     return string.Empty;
510             }
511         }
512
513         public XPathCompileException CreateException(string resId, params string[] args) {
514             return new XPathCompileException(xpathExpr, lexStart, curIndex, resId, args);
515         }
516     }
517 }