Updates referencesource to .NET 4.7
[mono.git] / mcs / class / referencesource / System.Xml / System / Xml / XPath / Internal / XPathScanner.cs
1 //------------------------------------------------------------------------------
2 // <copyright file="XPathScanner.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>                                                                
5 // <owner current="true" primary="true">Microsoft</owner>
6 //------------------------------------------------------------------------------
7
8 namespace MS.Internal.Xml.XPath {
9     using System;
10     using System.Xml;
11     using System.Xml.XPath;
12     using System.Diagnostics;
13     using System.Globalization;
14     using System.Text;
15     using System.Collections;
16
17     internal sealed class XPathScanner {
18         private string  xpathExpr;
19         private int     xpathExprIndex;
20         private LexKind kind;
21         private char    currentChar;
22         private string  name;
23         private string  prefix;
24         private string  stringValue;
25         private double  numberValue = double.NaN;
26         private bool    canBeFunction;
27         private XmlCharType xmlCharType = XmlCharType.Instance;
28
29         public XPathScanner(string xpathExpr) {
30             if (xpathExpr == null) {
31                 throw XPathException.Create(Res.Xp_ExprExpected, string.Empty);
32             }
33             this.xpathExpr = xpathExpr;
34                         NextChar();
35             NextLex();
36         }
37
38         public string SourceText { get { return this.xpathExpr; } }
39
40         private char CurerntChar { get { return currentChar; } }
41
42         private bool NextChar() {
43             Debug.Assert(0 <= xpathExprIndex && xpathExprIndex <= xpathExpr.Length);
44             if (xpathExprIndex < xpathExpr.Length) {
45                                 currentChar = xpathExpr[xpathExprIndex ++]; 
46                                 return true;
47                         }
48                         else  {
49                                 currentChar = '\0';
50                 return false;
51             }
52         }
53
54 #if XML10_FIFTH_EDITION
55         private char PeekNextChar() {
56             Debug.Assert(0 <= xpathExprIndex && xpathExprIndex <= xpathExpr.Length);
57             if (xpathExprIndex < xpathExpr.Length) {
58                 return xpathExpr[xpathExprIndex];
59             }
60             else {
61                 Debug.Assert(xpathExprIndex == xpathExpr.Length);
62                 return '\0';
63             }
64         }
65 #endif
66
67         public LexKind Kind { get { return this.kind; } }
68
69         public string Name {
70             get {
71                 Debug.Assert(this.kind == LexKind.Name || this.kind == LexKind.Axe);
72                 Debug.Assert(this.name != null);
73                 return this.name;
74             }
75         }
76
77         public string Prefix {
78             get {
79                 Debug.Assert(this.kind == LexKind.Name);
80                 Debug.Assert(this.prefix != null);
81                 return this.prefix;
82             }
83         }
84
85         public string StringValue {
86             get {
87                 Debug.Assert(this.kind == LexKind.String);
88                 Debug.Assert(this.stringValue != null);
89                 return this.stringValue;
90             }
91         }
92
93         public double NumberValue {
94             get {
95                 Debug.Assert(this.kind == LexKind.Number);
96                 Debug.Assert(this.numberValue != double.NaN);
97                 return this.numberValue;
98             }
99         }
100
101         // To parse PathExpr we need a way to distinct name from function. 
102         // THis distinction can't be done without context: "or (1 != 0)" this this a function or 'or' in OrExp 
103         public bool CanBeFunction {
104             get {
105                 Debug.Assert(this.kind == LexKind.Name);
106                 return this.canBeFunction;
107             }
108         }
109
110         void SkipSpace() {
111             while (xmlCharType.IsWhiteSpace(this.CurerntChar) && NextChar()) ;
112         }
113
114         public bool NextLex() {
115             SkipSpace();
116             switch (this.CurerntChar) {
117             case '\0'  : 
118                 kind = LexKind.Eof;
119                 return false;
120             case ',': case '@': case '(': case ')': 
121             case '|': case '*': case '[': case ']': 
122             case '+': case '-': case '=': case '#': 
123             case '$':
124                 kind =  (LexKind) Convert.ToInt32(this.CurerntChar, CultureInfo.InvariantCulture);
125                 NextChar();
126                 break;
127             case '<': 
128                 kind = LexKind.Lt;
129                 NextChar();
130                 if (this.CurerntChar == '=') {
131                     kind = LexKind.Le;
132                     NextChar();
133                 }
134                 break;
135             case '>': 
136                 kind = LexKind.Gt;
137                 NextChar();
138                 if (this.CurerntChar == '=') {
139                     kind = LexKind.Ge;
140                     NextChar();
141                 }
142                 break;
143             case '!': 
144                 kind = LexKind.Bang;
145                 NextChar();
146                 if (this.CurerntChar == '=') {
147                     kind = LexKind.Ne;
148                     NextChar();
149                 }
150                 break;
151             case '.': 
152                 kind = LexKind.Dot;
153                 NextChar();
154                 if (this.CurerntChar == '.') {
155                     kind = LexKind.DotDot;
156                     NextChar();
157                 }
158                 else if (XmlCharType.IsDigit(this.CurerntChar)) {
159                     kind = LexKind.Number;
160                     numberValue = ScanFraction();
161                 }
162                 break;
163             case '/':
164                 kind = LexKind.Slash;
165                 NextChar();
166                 if (this.CurerntChar == '/') {
167                     kind = LexKind.SlashSlash;
168                     NextChar();
169                 }
170                 break;
171             case '"': 
172             case '\'': 
173                 this.kind = LexKind.String;
174                 this.stringValue = ScanString();
175                 break;
176             default:
177                 if (XmlCharType.IsDigit(this.CurerntChar)) {
178                     kind = LexKind.Number;
179                     numberValue = ScanNumber();
180                 }
181                 else if (xmlCharType.IsStartNCNameSingleChar(this.CurerntChar) 
182 #if XML10_FIFTH_EDITION
183                     || xmlCharType.IsNCNameHighSurrogateChar(this.CurerntChar) 
184 #endif
185                     ) {
186                     kind = LexKind.Name;
187                     this.name   = ScanName();
188                     this.prefix = string.Empty;
189                     // "foo:bar" is one lexem not three because it doesn't allow spaces in between
190                     // We should distinct it from "foo::" and need process "foo ::" as well
191                     if (this.CurerntChar == ':') {
192                         NextChar();
193                         // can be "foo:bar" or "foo::"
194                         if (this.CurerntChar == ':') {   // "foo::"
195                             NextChar();
196                             kind = LexKind.Axe;
197                         }
198                         else {                          // "foo:*", "foo:bar" or "foo: "
199                             this.prefix = this.name;
200                             if (this.CurerntChar == '*') {
201                                     NextChar();
202                                 this.name = "*";
203                             }
204                             else if (xmlCharType.IsStartNCNameSingleChar(this.CurerntChar) 
205 #if XML10_FIFTH_EDITION
206                                 || xmlCharType.IsNCNameHighSurrogateChar(this.CurerntChar)
207 #endif
208                                 ) {
209                                 this.name = ScanName(); 
210                             }
211                             else {
212                                 throw XPathException.Create(Res.Xp_InvalidName, SourceText);
213                             }
214                         }
215
216                     }
217                     else {
218                         SkipSpace();
219                         if (this.CurerntChar == ':') {
220                             NextChar();
221                             // it can be "foo ::" or just "foo :"
222                             if (this.CurerntChar == ':') {
223                                 NextChar();
224                                 kind = LexKind.Axe;
225                             }
226                             else {
227                                 throw XPathException.Create(Res.Xp_InvalidName, SourceText);
228                             }
229                         }
230                     }
231                     SkipSpace();
232                     this.canBeFunction = (this.CurerntChar == '(');
233                 }
234                 else {
235                     throw XPathException.Create(Res.Xp_InvalidToken, SourceText);
236                 }
237                         break;
238             }
239             return true;
240         }
241
242         private double ScanNumber() {
243             Debug.Assert(this.CurerntChar == '.' || XmlCharType.IsDigit(this.CurerntChar));
244             int start = xpathExprIndex - 1;
245                         int len = 0;
246                         while (XmlCharType.IsDigit(this.CurerntChar)) {
247                                 NextChar(); len ++;
248                         }
249             if (this.CurerntChar == '.') {
250                                 NextChar(); len ++;
251                                 while (XmlCharType.IsDigit(this.CurerntChar)) {
252                                         NextChar(); len ++;
253                                 }
254             }
255                         return XmlConvert.ToXPathDouble(this.xpathExpr.Substring(start, len));
256         }
257
258         private double ScanFraction() {
259             Debug.Assert(XmlCharType.IsDigit(this.CurerntChar));
260             int start = xpathExprIndex - 2;
261             Debug.Assert(0 <= start && this.xpathExpr[start] == '.');
262                         int len = 1; // '.'
263                         while (XmlCharType.IsDigit(this.CurerntChar)) {
264                                 NextChar(); len ++;
265                         }
266                         return XmlConvert.ToXPathDouble(this.xpathExpr.Substring(start, len));
267         }
268
269         private string ScanString() {
270             char endChar = this.CurerntChar;
271             NextChar();
272             int start = xpathExprIndex - 1;
273                         int len = 0;
274             while(this.CurerntChar != endChar) {
275                                 if (! NextChar()) {
276                         throw XPathException.Create(Res.Xp_UnclosedString);
277                                 }
278                                 len ++;
279                         }
280             Debug.Assert(this.CurerntChar == endChar);
281             NextChar();
282             return this.xpathExpr.Substring(start, len);
283         }
284
285         private string ScanName() {
286             Debug.Assert(xmlCharType.IsStartNCNameSingleChar(this.CurerntChar) 
287 #if XML10_FIFTH_EDITION
288                 || xmlCharType.IsNCNameHighSurrogateChar(this.CurerntChar)
289 #endif
290                 );
291             int start = xpathExprIndex - 1;
292                         int len = 0;
293
294             for (;;) {
295                 if (xmlCharType.IsNCNameSingleChar(this.CurerntChar)) {
296                                     NextChar(); 
297                     len ++;
298                 }
299 #if XML10_FIFTH_EDITION
300                 else if (xmlCharType.IsNCNameSurrogateChar(this.PeekNextChar(), this.CurerntChar)) {
301                     NextChar(); 
302                     NextChar(); 
303                     len += 2;
304                 }
305 #endif
306                 else {
307                     break;
308                 }
309                         }
310             return this.xpathExpr.Substring(start, len);
311         }
312
313         public enum LexKind  {
314             Comma                 = ',',
315             Slash                 = '/',
316             At                    = '@',
317             Dot                   = '.',
318             LParens               = '(',
319             RParens               = ')',
320             LBracket              = '[',
321             RBracket              = ']',
322             Star                  = '*',
323             Plus                  = '+',
324             Minus                 = '-',
325             Eq                    = '=',
326             Lt                    = '<',
327             Gt                    = '>',
328             Bang                  = '!',
329             Dollar                = '$',
330             Apos                  = '\'',
331             Quote                 = '"',
332             Union                 = '|',
333             Ne                    = 'N',   // !=
334             Le                    = 'L',   // <=
335             Ge                    = 'G',   // >=
336             And                   = 'A',   // &&
337             Or                    = 'O',   // ||
338             DotDot                = 'D',   // ..
339             SlashSlash            = 'S',   // //
340             Name                  = 'n',   // XML _Name
341             String                = 's',   // Quoted string constant
342             Number                = 'd',   // _Number constant
343             Axe                   = 'a',   // Axe (like child::)
344             Eof                   = 'E',
345         };
346     }
347 }
348