Merge pull request #1099 from jsportaro/master
[mono.git] / mcs / class / System.Data / Mono.Data.SqlExpressions / Tokenizer.cs
1 //
2 // SqlWhereClauseTokenizer.cs
3 //
4 // Author:
5 //   Juraj Skripsky (juraj@hotfeet.ch)
6 //
7 // (C) 2004 HotFeet GmbH (http://www.hotfeet.ch)
8 //
9
10 //
11 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining
14 // a copy of this software and associated documentation files (the
15 // "Software"), to deal in the Software without restriction, including
16 // without limitation the rights to use, copy, modify, merge, publish,
17 // distribute, sublicense, and/or sell copies of the Software, and to
18 // permit persons to whom the Software is furnished to do so, subject to
19 // the following conditions:
20 // 
21 // The above copyright notice and this permission notice shall be
22 // included in all copies or substantial portions of the Software.
23 // 
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
28 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
29 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
30 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //
32
33 using System;
34 using System.Data;
35 using System.Globalization;
36 using System.IO;
37 using System.Text;
38 using System.Collections;
39
40 namespace Mono.Data.SqlExpressions {
41         internal class Tokenizer : yyParser.yyInput {
42                 private static readonly IDictionary tokenMap = new Hashtable ();
43                 private static readonly Object [] tokens = {
44                         Token.AND, "and",
45                         Token.OR, "or",
46                         Token.NOT, "not",
47                         
48                         Token.TRUE, "true",
49                         Token.FALSE, "false",
50                         Token.NULL, "null",
51                         
52                         Token.PARENT, "parent",
53                         Token.CHILD, "child",
54                         
55                         Token.IS, "is",
56                         Token.IN, "in",
57                         Token.NOT_IN, "not in",
58                         Token.LIKE, "like",
59                         Token.NOT_LIKE, "not like",
60                         
61                         Token.COUNT, "count",
62                         Token.SUM, "sum",
63                         Token.AVG, "avg",
64                         Token.MAX, "max",
65                         Token.MIN, "min",
66                         Token.STDEV, "stdev",
67                         Token.VAR, "var",
68                         
69                         Token.IIF, "iif",
70                         Token.SUBSTRING, "substring",
71                         Token.ISNULL, "isnull",
72                         Token.LEN, "len",
73                         Token.TRIM, "trim",
74                         Token.CONVERT, "convert"
75                 };
76                 private char[] input;
77                 private int pos;
78
79                 private int tok;
80                 private object val;
81
82                 static Tokenizer ()
83                 {
84                         for (int i = 0; i < tokens.Length; i += 2)
85                                 tokenMap.Add (tokens [i + 1], tokens [i]);
86                 }
87
88                 public Tokenizer (string strInput)
89                 {
90                         input = strInput.ToCharArray ();
91                         pos = 0;
92                 }
93
94                 private char Current() {
95                         return input [pos];
96                 }
97
98                 private char Next() {
99                         if (pos + 1 >= input.Length)
100                                 return (char)0;
101                         return input [pos + 1];
102                 }
103
104                 private bool MoveNext() {
105                         pos++;
106                         if (pos >= input.Length)
107                                 return false;
108
109                         return true;
110                 }
111                 
112                 private bool SkipWhiteSpace ()
113                 {
114                         if (pos >= input.Length)
115                                 return false;
116
117                         while (Char.IsWhiteSpace (Current ())) {
118                                 if (!MoveNext ())
119                                         return false;
120                         }
121
122                         return true;
123                 }
124
125                 private object ReadNumber ()
126                 {
127                         StringBuilder sb = new StringBuilder ();
128                         sb.Append (Current ());
129
130                         char next;
131                         while (Char.IsDigit (next = Next ()) || next == '.') {
132                                 sb.Append (next);
133                                 if (!MoveNext ())
134                                         break;
135                         }
136
137                         string str = sb.ToString ();
138
139                         if (str.IndexOf ('.') < 0)
140                                 return Int64.Parse (str, CultureInfo.InvariantCulture);
141
142                         return double.Parse (str, CultureInfo.InvariantCulture);
143                 }
144
145                 private char ProcessEscapes(char c)
146                 {
147                         if (c == '\\') {
148                                 if (MoveNext())
149                                         c = Current ();
150                                 else
151                                         c = '\0';
152
153                                 switch (c) {
154                                 case 'n':
155                                         c = '\n';
156                                         break;
157                                 case 'r':
158                                         c = '\r';
159                                         break;
160                                 case 't':
161                                         c = '\t';
162                                         break;
163
164                                 case '\\':
165                                         c = '\\';
166                                         break;
167                                         
168                                 default:
169                                         throw new SyntaxErrorException (String.Format ("Invalid escape sequence: '\\{0}'.", c));
170                                 }
171                         }
172                         return c;
173                 }
174
175                 private string ReadString (char terminator)
176                 {
177                         return ReadString (terminator, false /* canEscape */);
178                 }
179
180                 private string ReadString (char terminator, 
181                                            bool canEscape // twice the terminator is not a terminator
182                                            )
183                 {
184                         bool terminated = false;
185                         StringBuilder sb = new StringBuilder ();
186                         while (MoveNext ()) {
187                                 if (Current () == terminator) {
188                                         if (Next () == terminator) {
189                                                 sb.Append (ProcessEscapes (Current ()));
190                                                 MoveNext ();
191                                                 continue;
192                                         }
193                                         terminated = true;
194                                         break;
195                                 }
196                                 sb.Append (ProcessEscapes (Current ()));
197                         }
198                         
199                         if (! terminated)
200                                 throw new SyntaxErrorException (String.Format ("invalid string at {0}{1}<--",
201                                                                                terminator,
202                                                                                sb.ToString ())
203                                                                 );
204                         return sb.ToString ();                  
205                 }
206
207                 private string ReadIdentifier ()
208                 {
209                         StringBuilder sb = new StringBuilder ();
210                         sb.Append (Current ());
211
212                         char next;
213                         string ret;
214                         while ((next = Next ()) == '_' || Char.IsLetterOrDigit (next) || next == '\\') {
215                                 sb.Append (ProcessEscapes (next));                              
216                                 if (!MoveNext ())
217                                         break;
218                         }
219                         ret = sb.ToString ();
220                         if (String.Compare (ret,
221                                             "not",
222 #if NET_2_0
223                                             StringComparison.OrdinalIgnoreCase
224 #else
225                                             true, CultureInfo.InvariantCulture
226 #endif
227                             ) == 0) {
228                                 int savedPos = pos;
229                                 while (Char.IsWhiteSpace ((next = Next ()))) {
230                                         if (!MoveNext ()) {
231                                                 pos = savedPos;
232                                                 return ret;
233                                         }
234                                 }
235                                 MoveNext ();
236                                 
237                                 string target;
238                                 switch (Current ()) {
239                                         case 'i':
240                                         case 'I':
241                                                 target = "in";
242                                                 break;
243
244                                         case 'l':
245                                         case 'L':
246                                                 target = "like";
247                                                 break;
248                                         
249                                         default:
250                                                 pos = savedPos;
251                                                 return ret;
252                                 }
253
254                                 int tlen = target.Length;
255                                 int idx = 1;
256                                 while (tlen-- > 0 && Char.IsLetter ((next = Next ()))) {
257                                         if (target [idx++] != Char.ToLowerInvariant (next)) {
258                                                 pos = savedPos;
259                                                 return ret;
260                                         }
261                                         MoveNext ();
262                                 }
263
264                                 sb.Append (' ');
265                                 sb.Append (target);
266                                 ret = sb.ToString ();
267                         }
268
269                         return ret;
270                 }
271
272                 private int ParseIdentifier ()
273                 {
274                         string strToken = ReadIdentifier ();
275                         object tokenObj = tokenMap[strToken.ToLower()];
276
277                         if(tokenObj != null)
278                                 return (int)tokenObj;
279                         
280                         val = strToken;
281                         return Token.Identifier;
282                 }
283
284                 private int ParseToken ()
285                 {
286                         char cur;
287                         switch (cur = Current ()) {
288                         case '(':
289                                 return Token.PAROPEN;
290
291                         case ')':
292                                 return Token.PARCLOSE;
293
294                         case '.':
295                                 return Token.DOT;
296
297                         case ',':
298                                 return Token.COMMA;
299
300                         case '+':
301                                 return Token.PLUS;
302
303                         case '-':
304                                 return Token.MINUS;
305
306                         case '*':
307                                 return Token.MUL;
308
309                         case '/':
310                                 return Token.DIV;
311
312                         case '%':
313                                 return Token.MOD;
314                                 
315                         case '=':
316                                 return Token.EQ;
317
318                         case '<':
319                                 return Token.LT;
320
321                         case '>':
322                                 return Token.GT;
323
324                         case '[':
325                                 val = ReadString (']');
326                                 return Token.Identifier;
327
328                         case '#':
329                                 string date = ReadString ('#');
330                                 val = DateTime.Parse (date, CultureInfo.InvariantCulture);
331                                 return Token.DateLiteral;
332
333                         case '\'':
334                         case '\"':
335                                 val = ReadString (cur, true);
336                                 return Token.StringLiteral;
337
338                         default:
339                                 if (Char.IsDigit (cur)) {                               
340                                         val = ReadNumber ();
341                                         return Token.NumberLiteral;
342                                 } else if (Char.IsLetter (cur) || cur == '_')
343                                         return ParseIdentifier ();
344                                 break;
345                         }
346                         throw new SyntaxErrorException ("invalid token: '" + cur + "'");
347                 }
348
349                 ///////////////////////////
350                 // yyParser.yyInput methods
351                 ///////////////////////////
352
353                 /** move on to next token.
354                   @return false if positioned beyond tokens.
355                   @throws IOException on input error.
356                   */
357                 public bool advance ()
358                 {
359                         if (!SkipWhiteSpace())
360                                 return false;
361                         tok = ParseToken();
362                         MoveNext ();
363                         return true;
364                 }
365
366                 /** classifies current token.
367                   Should not be called if advance() returned false.
368                   @return current %token or single character.
369                   */
370                 public int token ()
371                 {
372                         return tok;
373                 }
374
375                 /** associated with current token.
376                   Should not be called if advance() returned false.
377                   @return value for token().
378                   */
379                 public Object value ()
380                 {
381                         return val;
382                 }
383         }
384 }