46c643fd3efb6ee7e1f064c42ccfbe2d1fd3d42f
[mono.git] / mcs / mcs / cs-tokenizer.cs
1 //\r
2 // cs-tokenizer.cs: The Tokenizer for the C# compiler\r
3 //\r
4 // Author: Miguel de Icaza (miguel@gnu.org)\r
5 //\r
6 // Licensed under the terms of the GNU GPL\r
7 //\r
8 // (C) 2001 Ximian, Inc (http://www.ximian.com)\r
9 //\r
10 \r
11 /*\r
12   Todo:\r
13 \r
14   Do something with the integer and float suffixes, pass full datatype?\r
15   Make sure we accept the proper Unicode ranges, per the spec.\r
16 \r
17   Open issues:\r
18 \r
19   * Data type handling\r
20   \r
21           Currently I am returning different tokens for the various\r
22           kinds of floating point types (float, double, decimal) and I\r
23           am only returning a single token for all integer values\r
24           (integer, unsigned int, etc) as an experiment as to see\r
25           which mechanism is better.\r
26         \r
27           I do not know yet how I will be doing the mapping of "int"\r
28           to things like System.Int32 and so on.  I am confused.  MAN\r
29           I AM C\r
30         \r
31           Indeed, this might be the core of the problem, I should\r
32           *probably* just return a TYPE token and have the value of\r
33           the token be stuff like `System.Int32', `System.UInt32',\r
34           `System.Double' and so on.  I will see.\r
35 \r
36   * Error reporting.\r
37 \r
38           I was returning Token.ERROR on errors and setting an\r
39           internal error string with the details, but it might make sense\r
40           to just use exceptions.\r
41 \r
42           Change of mind: I think I want to keep returning errors *UNLESS* the\r
43           parser is catching errors from the tokenizer (at that point, there is\r
44           not really any reason to use exceptions) so that I can continue the\r
45           parsing \r
46 \r
47   * IDEA\r
48 \r
49           I think I have solved the problem.  The idea is to not even *bother*\r
50           about handling data types a lot here (except for fitting data into\r
51           the proper places), but let the upper layer handle it.\r
52 \r
53           Ie, treat LITERAL_CHARACTER, LITERAL_INTEGER, LITERAL_FLOAT, LITERAL_DOUBLE, and\r
54           return then as `LITERAL_LITERAL' with maybe subdetail information\r
55 \r
56 */\r
57 \r
58 using System;\r
59 using System.Text;\r
60 using System.Collections;\r
61 using System.IO;\r
62 using System.Globalization;\r
63 \r
64 namespace CIR\r
65 {\r
66         /// <summary>\r
67         ///    Tokenizer for C# source code. \r
68         /// </summary>\r
69 \r
70         public class Tokenizer : yyParser.yyInput\r
71         {\r
72                 StreamReader reader;\r
73                 public string ref_name;\r
74                 public int ref_line = 1;\r
75                 public int line = 1;\r
76                 public int col = 1;\r
77                 public int current_token;\r
78                 bool handle_get_set = false;\r
79 \r
80                 //\r
81                 // Returns a verbose representation of the current location\r
82                 //\r
83                 public string location {\r
84                         get {\r
85                                 string det;\r
86 \r
87                                 if (current_token == Token.ERROR)\r
88                                         det = "detail: " + error_details;\r
89                                 else\r
90                                         det = "";\r
91                                 \r
92                                 //return "Line:     "+line+" Col: "+col + "\n" +\r
93                                 //       "VirtLine: "+ref_line +\r
94                                 //       " Token: "+current_token + " " + det;\r
95 \r
96                                 return ref_name + " " + "(" + line + "," + col + ")";\r
97                         }\r
98                 }\r
99 \r
100                 public bool properties {\r
101                         get {\r
102                                 return handle_get_set;\r
103                         }\r
104 \r
105                         set {\r
106                                 handle_get_set = value;\r
107                         }\r
108                 }\r
109                 \r
110                 //\r
111                 // Class variables\r
112                 // \r
113                 static Hashtable keywords;\r
114                 static NumberStyles styles;\r
115                 static NumberFormatInfo csharp_format_info;\r
116                 \r
117                 //\r
118                 // Values for the associated token returned\r
119                 //\r
120                 System.Text.StringBuilder number;\r
121                 int putback_char;\r
122                 Object val;\r
123                 \r
124                 //\r
125                 // Details about the error encoutered by the tokenizer\r
126                 //\r
127                 string error_details;\r
128                 \r
129                 public string error {\r
130                         get {\r
131                                 return error_details;\r
132                         }\r
133                 }\r
134                 \r
135                 public int Line {\r
136                         get {\r
137                                 return line;\r
138                         }\r
139                 }\r
140 \r
141                 public int Col {\r
142                         get {\r
143                                 return col;\r
144                         }\r
145                 }\r
146                 \r
147                 static void initTokens ()\r
148                 {\r
149                         keywords = new Hashtable ();\r
150 \r
151                         keywords.Add ("abstract", Token.ABSTRACT);\r
152                         keywords.Add ("as", Token.AS);\r
153                         keywords.Add ("add", Token.ADD);\r
154                         keywords.Add ("base", Token.BASE);\r
155                         keywords.Add ("bool", Token.BOOL);\r
156                         keywords.Add ("break", Token.BREAK);\r
157                         keywords.Add ("byte", Token.BYTE);\r
158                         keywords.Add ("case", Token.CASE);\r
159                         keywords.Add ("catch", Token.CATCH);\r
160                         keywords.Add ("char", Token.CHAR);\r
161                         keywords.Add ("checked", Token.CHECKED);\r
162                         keywords.Add ("class", Token.CLASS);\r
163                         keywords.Add ("const", Token.CONST);\r
164                         keywords.Add ("continue", Token.CONTINUE);\r
165                         keywords.Add ("decimal", Token.DECIMAL);\r
166                         keywords.Add ("default", Token.DEFAULT);\r
167                         keywords.Add ("delegate", Token.DELEGATE);\r
168                         keywords.Add ("do", Token.DO);\r
169                         keywords.Add ("double", Token.DOUBLE);\r
170                         keywords.Add ("else", Token.ELSE);\r
171                         keywords.Add ("enum", Token.ENUM);\r
172                         keywords.Add ("event", Token.EVENT);\r
173                         keywords.Add ("explicit", Token.EXPLICIT);\r
174                         keywords.Add ("extern", Token.EXTERN);\r
175                         keywords.Add ("false", Token.FALSE);\r
176                         keywords.Add ("finally", Token.FINALLY);\r
177                         keywords.Add ("fixed", Token.FIXED);\r
178                         keywords.Add ("float", Token.FLOAT);\r
179                         keywords.Add ("for", Token.FOR);\r
180                         keywords.Add ("foreach", Token.FOREACH);\r
181                         keywords.Add ("goto", Token.GOTO);\r
182                         keywords.Add ("get", Token.GET);\r
183                         keywords.Add ("if", Token.IF);\r
184                         keywords.Add ("implicit", Token.IMPLICIT);\r
185                         keywords.Add ("in", Token.IN);\r
186                         keywords.Add ("int", Token.INT);\r
187                         keywords.Add ("interface", Token.INTERFACE);\r
188                         keywords.Add ("internal", Token.INTERNAL);\r
189                         keywords.Add ("is", Token.IS);\r
190                         keywords.Add ("lock ", Token.LOCK );\r
191                         keywords.Add ("long", Token.LONG);\r
192                         keywords.Add ("namespace", Token.NAMESPACE);\r
193                         keywords.Add ("new", Token.NEW);\r
194                         keywords.Add ("null", Token.NULL);\r
195                         keywords.Add ("object", Token.OBJECT);\r
196                         keywords.Add ("operator", Token.OPERATOR);\r
197                         keywords.Add ("out", Token.OUT);\r
198                         keywords.Add ("override", Token.OVERRIDE);\r
199                         keywords.Add ("params", Token.PARAMS);\r
200                         keywords.Add ("private", Token.PRIVATE);\r
201                         keywords.Add ("protected", Token.PROTECTED);\r
202                         keywords.Add ("public", Token.PUBLIC);\r
203                         keywords.Add ("readonly", Token.READONLY);\r
204                         keywords.Add ("ref", Token.REF);\r
205                         keywords.Add ("remove", Token.REMOVE);\r
206                         keywords.Add ("return", Token.RETURN);\r
207                         keywords.Add ("sbyte", Token.SBYTE);\r
208                         keywords.Add ("sealed", Token.SEALED);\r
209                         keywords.Add ("set", Token.SET);\r
210                         keywords.Add ("short", Token.SHORT);\r
211                         keywords.Add ("sizeof", Token.SIZEOF);\r
212                         keywords.Add ("static", Token.STATIC);\r
213                         keywords.Add ("string", Token.STRING);\r
214                         keywords.Add ("struct", Token.STRUCT);\r
215                         keywords.Add ("switch", Token.SWITCH);\r
216                         keywords.Add ("this", Token.THIS);\r
217                         keywords.Add ("throw", Token.THROW);\r
218                         keywords.Add ("true", Token.TRUE);\r
219                         keywords.Add ("try", Token.TRY);\r
220                         keywords.Add ("typeof", Token.TYPEOF);\r
221                         keywords.Add ("uint", Token.UINT);\r
222                         keywords.Add ("ulong", Token.ULONG);\r
223                         keywords.Add ("unchecked", Token.UNCHECKED);\r
224                         keywords.Add ("unsafe", Token.UNSAFE);\r
225                         keywords.Add ("ushort", Token.USHORT);\r
226                         keywords.Add ("using", Token.USING);\r
227                         keywords.Add ("virtual", Token.VIRTUAL);\r
228                         keywords.Add ("void", Token.VOID);\r
229                         keywords.Add ("while", Token.WHILE);\r
230                 }\r
231 \r
232                 //\r
233                 // Class initializer\r
234                 // \r
235                 static Tokenizer ()\r
236                 {\r
237                         initTokens ();\r
238                         csharp_format_info = new NumberFormatInfo ();\r
239                         csharp_format_info.CurrencyDecimalSeparator = ".";\r
240                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;\r
241                 }\r
242 \r
243                 bool is_keyword (string name)\r
244                 {\r
245                         bool res;\r
246                         \r
247                         res = keywords.Contains (name);\r
248                         if ((name == "get" || name == "set") && handle_get_set == false)\r
249                                 return false;\r
250                         return res;\r
251                 }\r
252 \r
253                 int getKeyword (string name)\r
254                 {\r
255                         return (int) (keywords [name]);\r
256                 }\r
257 \r
258                 public Location Location {\r
259                         get {\r
260                                 return new Location (ref_name, col, ref_line);\r
261                         }\r
262                 }\r
263                 \r
264                 public Tokenizer (System.IO.Stream input, string fname)\r
265                 {\r
266                         this.ref_name = fname;\r
267                         reader = new System.IO.StreamReader (input);\r
268                         putback_char = -1;\r
269                 }\r
270 \r
271                 bool is_identifier_start_character (char c)\r
272                 {\r
273                         return Char.IsLetter (c) || c == '_' ;\r
274                 }\r
275 \r
276                 bool is_identifier_part_character (char c)\r
277                 {\r
278                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');\r
279                 }\r
280 \r
281                 int is_punct (char c, ref bool doread)\r
282                 {\r
283                         int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);\r
284                         int d;\r
285                         int t;\r
286 \r
287                         doread = false;\r
288 \r
289                         switch (c){\r
290                         case '{':\r
291                                 return Token.OPEN_BRACE;\r
292                         case '}':\r
293                                 return Token.CLOSE_BRACE;\r
294                         case '[':\r
295                                 return Token.OPEN_BRACKET;\r
296                         case ']':\r
297                                 return Token.CLOSE_BRACKET;\r
298                         case '(':\r
299                                 return Token.OPEN_PARENS;\r
300                         case ')':\r
301                                 return Token.CLOSE_PARENS;\r
302                         case ',':\r
303                                 return Token.COMMA;\r
304                         case ':':\r
305                                 return Token.COLON;\r
306                         case ';':\r
307                                 return Token.SEMICOLON;\r
308                         case '~':\r
309                                 return Token.TILDE;\r
310                         case '?':\r
311                                 return Token.INTERR;\r
312                         }\r
313 \r
314                         d = peekChar ();\r
315                         if (c == '+'){\r
316                                 \r
317                                 if (d == '+')\r
318                                         t = Token.OP_INC;\r
319                                 else if (d == '=')\r
320                                         t = Token.OP_ADD_ASSIGN;\r
321                                 else\r
322                                         return Token.PLUS;\r
323                                 doread = true;\r
324                                 return t;\r
325                         }\r
326                         if (c == '-'){\r
327                                 if (d == '-')\r
328                                         t = Token.OP_DEC;\r
329                                 else if (d == '=')\r
330                                         t = Token.OP_SUB_ASSIGN;\r
331                                 else if (d == '>')\r
332                                         return Token.OP_PTR;\r
333                                 else\r
334                                         return Token.MINUS;\r
335                                 doread = true;\r
336                                 return t;\r
337                         }\r
338 \r
339                         if (c == '!'){\r
340                                 if (d == '='){\r
341                                         doread = true;\r
342                                         return Token.OP_NE;\r
343                                 }\r
344                                 return Token.BANG;\r
345                         }\r
346 \r
347                         if (c == '='){\r
348                                 if (d == '='){\r
349                                         doread = true;\r
350                                         return Token.OP_EQ;\r
351                                 }\r
352                                 return Token.ASSIGN;\r
353                         }\r
354 \r
355                         if (c == '&'){\r
356                                 if (d == '&'){\r
357                                         doread = true;\r
358                                         return Token.OP_AND;\r
359                                 } else if (d == '='){\r
360                                         doread = true;\r
361                                         return Token.OP_AND_ASSIGN;\r
362                                 }\r
363                                 return Token.BITWISE_AND;\r
364                         }\r
365 \r
366                         if (c == '|'){\r
367                                 if (d == '|'){\r
368                                         doread = true;\r
369                                         return Token.OP_OR;\r
370                                 } else if (d == '='){\r
371                                         doread = true;\r
372                                         return Token.OP_OR_ASSIGN;\r
373                                 }\r
374                                 return Token.BITWISE_OR;\r
375                         }\r
376 \r
377                         if (c == '*'){\r
378                                 if (d == '='){\r
379                                         doread = true;\r
380                                         return Token.OP_MULT_ASSIGN;\r
381                                 }\r
382                                 return Token.STAR;\r
383                         }\r
384 \r
385                         if (c == '/'){\r
386                                 if (d == '='){\r
387                                         doread = true;\r
388                                         return Token.OP_DIV_ASSIGN;\r
389                                 }\r
390                                 return Token.DIV;\r
391                         }\r
392 \r
393                         if (c == '%'){\r
394                                 if (d == '='){\r
395                                         doread = true;\r
396                                         return Token.OP_MOD_ASSIGN;\r
397                                 }\r
398                                 return Token.PERCENT;\r
399                         }\r
400 \r
401                         if (c == '^'){\r
402                                 if (d == '='){\r
403                                         doread = true;\r
404                                         return Token.OP_XOR_ASSIGN;\r
405                                 }\r
406                                 return Token.CARRET;\r
407                         }\r
408 \r
409                         if (c == '<'){\r
410                                 if (d == '<'){\r
411                                         getChar ();\r
412                                         d = peekChar ();\r
413 \r
414                                         if (d == '='){\r
415                                                 doread = true;\r
416                                                 return Token.OP_SHIFT_LEFT_ASSIGN;\r
417                                         }\r
418                                         return Token.OP_SHIFT_LEFT;\r
419                                 } else if (d == '='){\r
420                                         doread = true;\r
421                                         return Token.OP_LE;\r
422                                 }\r
423                                 return Token.OP_LT;\r
424                         }\r
425 \r
426                         if (c == '>'){\r
427                                 if (d == '>'){\r
428                                         getChar ();\r
429                                         d = peekChar ();\r
430 \r
431                                         if (d == '='){\r
432                                                 doread = true;\r
433                                                 return Token.OP_SHIFT_RIGHT_ASSIGN;\r
434                                         }\r
435                                         return Token.OP_SHIFT_RIGHT;\r
436                                 } else if (d == '='){\r
437                                         doread = true;\r
438                                         return Token.OP_GE;\r
439                                 }\r
440                                 return Token.OP_GT;\r
441                         }\r
442                         return Token.ERROR;\r
443                 }\r
444 \r
445                 bool decimal_digits (int c)\r
446                 {\r
447                         int d;\r
448                         bool seen_digits = false;\r
449                         \r
450                         if (c != -1)\r
451                                 number.Append ((char) c);\r
452                         \r
453                         while ((d = peekChar ()) != -1){\r
454                                 if (Char.IsDigit ((char)d)){\r
455                                         number.Append ((char) d);\r
456                                         getChar ();\r
457                                         seen_digits = true;\r
458                                 } else\r
459                                         break;\r
460                         }\r
461                         return seen_digits;\r
462                 }\r
463 \r
464                 void hex_digits (int c)\r
465                 {\r
466                         int d;\r
467 \r
468                         if (c != -1)\r
469                                 number.Append ((char) c);\r
470                         while ((d = peekChar ()) != -1){\r
471                                 char e = Char.ToUpper ((char) d);\r
472                                 \r
473                                 if (Char.IsDigit (e) ||\r
474                                     (e >= 'A' && e <= 'F')){\r
475                                         number.Append ((char) e);\r
476                                         getChar ();\r
477                                 } else\r
478                                         break;\r
479                         }\r
480                 }\r
481                 \r
482                 int real_type_suffix (int c)\r
483                 {\r
484                         int t;\r
485                         \r
486                         switch (c){\r
487                         case 'F': case 'f':\r
488                                 t =  Token.LITERAL_FLOAT;\r
489                                 break;\r
490                         case 'D': case 'd':\r
491                                 t = Token.LITERAL_DOUBLE;\r
492                                 break;\r
493                         case 'M': case 'm':\r
494                                  t= Token.LITERAL_DECIMAL;\r
495                                 break;\r
496                         default:\r
497                                 return Token.NONE;\r
498                         }\r
499                         //getChar ();\r
500                         return t;\r
501                 }\r
502 \r
503                 int integer_type_suffix (int c)\r
504                 {\r
505                         // FIXME: Handle U and L suffixes.\r
506                         // We also need to see in which kind of\r
507                         // Int the thing fits better according to the spec.\r
508                         return Token.LITERAL_INTEGER;\r
509                 }\r
510                 \r
511                 void adjust_int (int t)\r
512                 {\r
513                         val = new System.Int32();\r
514                         val = System.Int32.Parse (number.ToString (), 0);\r
515                 }\r
516 \r
517                 int adjust_real (int t)\r
518                 {\r
519                         string s = number.ToString ();\r
520 \r
521                         switch (t){\r
522                         case Token.LITERAL_DECIMAL:\r
523                                 val = new System.Decimal ();\r
524                                 val = System.Decimal.Parse (\r
525                                         s, styles, csharp_format_info);\r
526                                 break;\r
527                         case Token.LITERAL_DOUBLE:\r
528                                 val = new System.Double ();\r
529                                 val = System.Double.Parse (\r
530                                         s, styles, csharp_format_info);\r
531                                 break;\r
532                         case Token.LITERAL_FLOAT:\r
533                                 val = new System.Double ();\r
534                                 val = (float) System.Double.Parse (\r
535                                         s, styles, csharp_format_info);\r
536                                 break;\r
537 \r
538                         case Token.NONE:\r
539                                 val = new System.Double ();\r
540                                 val = System.Double.Parse (\r
541                                         s, styles, csharp_format_info);\r
542                                 t = Token.LITERAL_DOUBLE;\r
543                                 break;\r
544                         }\r
545                         return t;\r
546                 }\r
547 \r
548                 //\r
549                 // Invoked if we know we have .digits or digits\r
550                 //\r
551                 int is_number (int c)\r
552                 {\r
553                         bool is_real = false;\r
554                         number = new System.Text.StringBuilder ();\r
555                         int type;\r
556 \r
557                         number.Length = 0;\r
558 \r
559                         if (Char.IsDigit ((char)c)){\r
560                                 if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){\r
561                                         getChar ();\r
562                                         hex_digits (-1);\r
563                                         val = new System.Int32 ();\r
564                                         val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);\r
565                                         return integer_type_suffix (peekChar ());\r
566                                 }\r
567                                 decimal_digits (c);\r
568                                 c = getChar ();\r
569                         }\r
570 \r
571                         //\r
572                         // We need to handle the case of\r
573                         // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)\r
574                         //\r
575                         if (c == '.'){\r
576                                 if (decimal_digits ('.')){\r
577                                         is_real = true;\r
578                                         c = peekChar ();\r
579                                 } else {\r
580                                         putback ('.');\r
581                                         number.Length -= 1;\r
582                                         adjust_int (Token.LITERAL_INTEGER);\r
583                                         return Token.LITERAL_INTEGER;\r
584                                 }\r
585                         }\r
586                         \r
587                         if (c == 'e' || c == 'E'){\r
588                                 is_real = true;\r
589                                 number.Append ("e");\r
590                                 getChar ();\r
591                                 \r
592                                 c = peekChar ();\r
593                                 if (c == '+'){\r
594                                         number.Append ((char) c);\r
595                                         getChar ();\r
596                                         c = peekChar ();\r
597                                 } else if (c == '-'){\r
598                                         number.Append ((char) c);\r
599                                         getChar ();\r
600                                         c = peekChar ();\r
601                                 }\r
602                                 decimal_digits (-1);\r
603                                 c = peekChar ();\r
604                         }\r
605 \r
606                         type = real_type_suffix (c);\r
607                         if (type == Token.NONE && !is_real){\r
608                                 type = integer_type_suffix (c);\r
609                                 adjust_int (type);\r
610                                 putback (c);\r
611                                 return type;\r
612                         } else\r
613                                 is_real = true;\r
614 \r
615                         if (is_real)\r
616                                 return adjust_real (type);\r
617 \r
618                         Console.WriteLine ("This should not be reached");\r
619                         throw new Exception ("Is Number should never reach this point");\r
620                 }\r
621                         \r
622                 int escape (int c)\r
623                 {\r
624                         int d;\r
625                         int v;\r
626 \r
627                         d = peekChar ();\r
628                         if (c != '\\')\r
629                                 return c;\r
630                         \r
631                         switch (d){\r
632                         case 'a':\r
633                                 v = '\a'; break;\r
634                         case 'b':\r
635                                 v = '\b'; break;\r
636                         case 'n':\r
637                                 v = '\n'; break;\r
638                         case 't':\r
639                                 v = '\t'; break;\r
640                         case 'v':\r
641                                 v = '\v'; break;\r
642                         case 'r':\r
643                                 v = 'c'; break;\r
644                         case '\\':\r
645                                 v = '\\'; break;\r
646                         case 'f':\r
647                                 v = '\f'; break;\r
648                         case '0':\r
649                                 v = 0; break;\r
650                         case '"':\r
651                                 v = '"'; break;\r
652                         case '\'':\r
653                                 v = '\''; break;\r
654                         default:\r
655                                 error_details = "cs1009: Unrecognized escape sequence " + (char)d;\r
656                                 return -1;\r
657                         }\r
658                         getChar ();\r
659                         return v;\r
660                 }\r
661 \r
662                 int getChar ()\r
663                 {\r
664                         if (putback_char != -1){\r
665                                 int x = putback_char;\r
666                                 putback_char = -1;\r
667 \r
668                                 return x;\r
669                         }\r
670                         return reader.Read ();\r
671                 }\r
672 \r
673                 int peekChar ()\r
674                 {\r
675                         if (putback_char != -1)\r
676                                 return putback_char;\r
677                         return reader.Peek ();\r
678                 }\r
679 \r
680                 void putback (int c)\r
681                 {\r
682                         if (putback_char != -1)\r
683                                 throw new Exception ("This should not happen putback on putback");\r
684                         putback_char = c;\r
685                 }\r
686 \r
687                 public bool advance ()\r
688                 {\r
689                         return peekChar () != -1;\r
690                 }\r
691 \r
692                 public Object Value {\r
693                         get {\r
694                                 return val;\r
695                         }\r
696                 }\r
697 \r
698                 public Object value ()\r
699                 {\r
700                         return val;\r
701                 }\r
702                 \r
703                 public int token ()\r
704                 {\r
705                         current_token = xtoken ();\r
706                         return current_token;\r
707                 }\r
708                 \r
709                 public int xtoken ()\r
710                 {\r
711                         int t;\r
712                         bool allow_keyword_as_ident = false;\r
713                         bool doread = false;\r
714                         int c;\r
715 \r
716                         val = null;\r
717                         for (;(c = getChar ()) != -1; col++) {\r
718                         \r
719                                 if (is_identifier_start_character ((char) c)){\r
720                                         System.Text.StringBuilder id = new System.Text.StringBuilder ();\r
721                                         string ids;\r
722                                         \r
723                                         id.Append ((char) c);\r
724                                         \r
725                                         while ((c = peekChar ()) != -1) {\r
726                                                 if (is_identifier_part_character ((char) c)){\r
727                                                         id.Append ((char)getChar ());\r
728                                                         col++;\r
729                                                 } else \r
730                                                         break;\r
731                                         }\r
732                                         \r
733                                         ids = id.ToString ();\r
734 \r
735                                         if (!is_keyword (ids) || allow_keyword_as_ident) {\r
736                                                 val = ids;\r
737                                                 return Token.IDENTIFIER;\r
738                                         }\r
739 \r
740                                         // true, false and null are in the hash anyway.\r
741                                         return getKeyword (ids);\r
742 \r
743                                 }\r
744 \r
745                                 if (c == '.'){\r
746                                         if (Char.IsDigit ((char) peekChar ()))\r
747                                                 return is_number (c);\r
748                                         return Token.DOT;\r
749                                 }\r
750                                 \r
751                                 if (Char.IsDigit ((char) c))\r
752                                         return is_number (c);\r
753 \r
754                                 // Handle double-slash comments.\r
755                                 if (c == '/'){\r
756                                         int d = peekChar ();\r
757                                 \r
758                                         if (d == '/'){\r
759                                                 getChar ();\r
760                                                 while ((d = getChar ()) != -1 && (d != '\n'))\r
761                                                         col++;\r
762                                                 line++;\r
763                                                 ref_line++;\r
764                                                 continue;\r
765                                         } else if (d == '*'){\r
766                                                 getChar ();\r
767 \r
768                                                 while ((d = getChar ()) != -1){\r
769                                                         if (d == '*' && peekChar () == '/'){\r
770                                                                 getChar ();\r
771                                                                 col++;\r
772                                                                 break;\r
773                                                         }\r
774                                                         if (d == '\n'){\r
775                                                                 line++;\r
776                                                                 ref_line++;\r
777                                                         }\r
778                                                         col++;\r
779                                                 }\r
780                                                 continue;\r
781                                         }\r
782                                 }\r
783 \r
784                                 /* For now, ignore pre-processor commands */\r
785                                 if (col == 1 && c == '#'){\r
786                                         System.Text.StringBuilder s = new System.Text.StringBuilder ();\r
787                                         \r
788                                         while ((c = getChar ()) != -1 && (c != '\n')){\r
789                                                 s.Append ((char) c);\r
790                                         }\r
791                                         if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){\r
792                                                 string arg = s.ToString ().Substring (5);\r
793                                                 int pos;\r
794 \r
795                                                 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){\r
796                                                         ref_line = System.Int32.Parse (arg.Substring (0, pos));\r
797                                                         pos++;\r
798 \r
799                                                         char [] quotes = { '\"' };\r
800 \r
801                                                         ref_name = arg.Substring (pos);\r
802                                                         ref_name.TrimStart (quotes);\r
803                                                         ref_name.TrimEnd (quotes);\r
804                                                 } else\r
805                                                         ref_line = System.Int32.Parse (arg);\r
806                                         }\r
807                                         line++;\r
808                                         ref_line++;\r
809                                         continue;\r
810                                 }\r
811                                 \r
812                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){\r
813                                         if (doread){\r
814                                                 getChar ();\r
815                                                 col++;\r
816                                         }\r
817                                         return t;\r
818                                 }\r
819                                 \r
820                                 if (c == '"'){\r
821                                         System.Text.StringBuilder s = new System.Text.StringBuilder ();\r
822 \r
823                                         while ((c = getChar ()) != -1){\r
824                                                 if (c == '"'){\r
825                                                         val = s.ToString ();\r
826                                                         return Token.LITERAL_STRING;\r
827                                                 }\r
828 \r
829                                                 c = escape (c);\r
830                                                 if (c == -1)\r
831                                                         return Token.ERROR;\r
832                                                 s.Append ((char) c);\r
833                                         }\r
834                                 }\r
835 \r
836                                 if (c == '\''){\r
837                                         c = getChar ();\r
838                                         if (c == '\''){\r
839                                                 error_details = "CS1011: Empty character literal";\r
840                                                 return Token.ERROR;\r
841                                         }\r
842                                         c = escape (c);\r
843                                         if (c == -1)\r
844                                                 return Token.ERROR;\r
845                                         val = new System.Char ();\r
846                                         val = (char) c;\r
847                                         c = getChar ();\r
848                                         if (c != '\''){\r
849                                                 error_details = "CS1012: Too many characters in character literal";\r
850                                                 // Try to recover, read until newline or next "'"\r
851                                                 while ((c = getChar ()) != -1){\r
852                                                         if (c == '\n' || c == '\'')\r
853                                                                 break;\r
854                                                         \r
855                                                 }\r
856                                                 return Token.ERROR;\r
857                                         }\r
858                                         return Token.LITERAL_CHARACTER;\r
859                                 }\r
860                                 \r
861                                 // white space\r
862                                 if (c == '\n'){\r
863                                         line++;\r
864                                         ref_line++;\r
865                                         col = 0;\r
866                                         continue;\r
867                                 }\r
868                                 if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r'){\r
869                                         if (c == '\t')\r
870                                                 col = (((col + 8) / 8) * 8) - 1;\r
871                                         \r
872                                         continue;\r
873                                 }\r
874 \r
875                                 if (c == '@'){\r
876                                         allow_keyword_as_ident = true;\r
877                                         continue;\r
878                                 }\r
879 \r
880                                 error_details = ((char)c).ToString ();\r
881                                 \r
882                                 return Token.ERROR;\r
883                         }\r
884 \r
885                         return Token.EOF;\r
886                 }\r
887         }\r
888 \r
889         public struct Location {\r
890                 public readonly string Name;\r
891                 public readonly int    Col;\r
892                 public readonly int    Row;\r
893 \r
894                 public Location (string name, int col, int row)\r
895                 {\r
896                         Name = name;\r
897                         Col = col;\r
898                         Row = row;\r
899                 }\r
900         }\r
901         \r
902 }\r