* support-test-*.cs: Rename from test-*-p2.cs.
[mono.git] / mcs / mbas / mb-tokenizer.cs
1 //
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
3 //
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 //       : Manjula GHM (mmanjula@novell.com)  
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
7 //
8 // Licensed under the terms of the GNU GPL
9 //
10 // Copyright (C) 2001 A Rafael D Teixeira
11 //
12
13 namespace Mono.MonoBASIC
14 {
15         using System;
16         using System.Text;
17         using System.Collections;
18         using System.IO;
19         using System.Globalization;
20         using Mono.Languages;
21         using Mono.MonoBASIC;
22         
23         /// <summary>
24         ///    Tokenizer for MonoBASIC source code. 
25         /// </summary>
26         
27         public class Tokenizer : yyParser.yyInput
28         {
29                 TextReader reader;
30                 string file_name;
31                 string ref_name;
32                 int ref_line = 0;
33                 int line = 0;
34                 int col = 1;
35                 public int current_token = Token.ERROR;
36                 public int last_token = Token.ERROR;
37                 bool handle_get_set = false;
38                 bool cant_have_a_type_character = false;
39
40                 public int ExpandedTabsSize = 4; 
41
42                 public string location {
43                         get {
44                                 string det;
45
46                                 if (current_token == Token.ERROR)
47                                         det = "detail: " + error_details;
48                                 else
49                                         det = "";
50                                 
51                                 return "Line:     "+line+" Col: "+col + "\n" +
52                                        "VirtLine: "+ref_line +
53                                        " Token: "+current_token + " " + det;
54                         }
55                 }
56
57                 public bool properties {
58                         get {
59                                 return handle_get_set;
60                         }
61
62                         set {
63                                 handle_get_set = value;
64                         }
65                 }
66                 
67                 //
68                 // Class variables
69                 // 
70                 static Hashtable keywords;
71                 static NumberStyles styles;
72                 static NumberFormatInfo csharp_format_info;
73                 
74                 //
75                 // Values for the associated token returned
76                 //
77                 StringBuilder number;
78                 int putback_char = -1;
79                 Object val;
80                 long lon = 0;
81                 
82                 //
83                 // Details about the error encoutered by the tokenizer
84                 //
85                 string error_details;
86                 
87                 public string error {
88                         get {
89                                 return error_details;
90                         }
91                 }
92
93                 
94                 public string Source {
95                         get {
96                                 return file_name;
97                         }
98
99                         set {
100                                 file_name = value;
101                                 ref_name = value;
102                                 Location.SetCurrentSource(file_name);
103                         }
104                 }
105
106                 public string EffectiveSource {
107                         get {
108                                 return ref_name;
109                         }
110                         set {
111                                 ref_name = value;
112                                 Location.SetCurrentSource(ref_name);
113                         }
114                 }
115
116                 public int Line {
117                         get {
118                                 return line;
119                         }
120                 }
121
122                 public int EffectiveLine {
123                         get {
124                                 return ref_line;
125                         }
126                         set {
127                                 ref_line = value;
128                         }
129                 }
130
131                 public int Col {
132                         get {
133                                 return col;
134                         }
135                 }
136                 
137                 static void initTokens ()
138                 {
139                         keywords = new Hashtable ();
140
141                         keywords.Add ("addhandler", Token.ADDHANDLER);
142                         keywords.Add ("addressof", Token.ADDRESSOF);
143                         keywords.Add ("alias", Token.ALIAS);
144                         keywords.Add ("and", Token.AND);
145                         keywords.Add ("andalso", Token.ANDALSO);
146                         keywords.Add ("ansi", Token.ANSI);
147                         keywords.Add ("as", Token.AS);
148                         keywords.Add ("assembly", Token.ASSEMBLY);
149                         keywords.Add ("auto", Token.AUTO);
150                         keywords.Add ("binary", Token.BINARY); // Not a VB.NET Keyword 
151                         keywords.Add ("boolean", Token.BOOLEAN);
152                         keywords.Add ("byref", Token.BYREF);
153                         keywords.Add ("byte", Token.BYTE);
154                         keywords.Add ("byval", Token.BYVAL);
155                         keywords.Add ("call", Token.CALL);
156                         keywords.Add ("case", Token.CASE);
157                         keywords.Add ("catch", Token.CATCH);
158                         keywords.Add ("cbool", Token.CBOOL);
159                         keywords.Add ("cbyte", Token.CBYTE);
160                         keywords.Add ("cchar", Token.CCHAR);
161                         keywords.Add ("cdate", Token.CDATE);
162                         keywords.Add ("cdec", Token.CDEC);
163                         keywords.Add ("cdbl", Token.CDBL);
164                         keywords.Add ("char", Token.CHAR);
165                         keywords.Add ("cint", Token.CINT);
166                         keywords.Add ("class", Token.CLASS);
167                         keywords.Add ("clng", Token.CLNG);
168                         keywords.Add ("cobj", Token.COBJ);
169                         keywords.Add ("compare", Token.COMPARE); // Not a VB.NET Keyword
170                         keywords.Add ("const", Token.CONST);
171                         keywords.Add ("cshort", Token.CSHORT);
172                         keywords.Add ("csng", Token.CSNG);
173                         keywords.Add ("cstr", Token.CSTR);
174                         keywords.Add ("ctype", Token.CTYPE);
175                         keywords.Add ("date", Token.DATE);
176                         keywords.Add ("decimal", Token.DECIMAL);
177                         keywords.Add ("declare", Token.DECLARE);
178                         keywords.Add ("default", Token.DEFAULT);
179                         keywords.Add ("delegate", Token.DELEGATE);
180                         keywords.Add ("dim", Token.DIM);
181                         keywords.Add ("directcast", Token.DIRECTCAST);                  
182                         keywords.Add ("do", Token.DO);
183                         keywords.Add ("double", Token.DOUBLE);
184                         keywords.Add ("each", Token.EACH);
185                         keywords.Add ("else", Token.ELSE);
186                         keywords.Add ("elseif", Token.ELSEIF);
187                         keywords.Add ("end", Token.END);
188                         keywords.Add ("endif", Token.ENDIF); // An unused VB.NET keyword
189                         keywords.Add ("enum", Token.ENUM);
190                         keywords.Add ("erase", Token.ERASE);
191                         keywords.Add ("error", Token.ERROR);
192                         keywords.Add ("event", Token.EVENT);
193                         keywords.Add ("exit", Token.EXIT);
194                         keywords.Add ("explicit", Token.EXPLICIT); // Not a VB.NET keyword 
195                         keywords.Add ("false", Token.FALSE);
196                         keywords.Add ("finally", Token.FINALLY);
197                         keywords.Add ("for", Token.FOR);
198                         keywords.Add ("friend", Token.FRIEND);
199                         keywords.Add ("function", Token.FUNCTION);
200                         keywords.Add ("get", Token.GET);
201                         keywords.Add ("gettype", Token.GETTYPE);
202                         keywords.Add ("gosub", Token.GOSUB); // An unused VB.NET keyword 
203                         keywords.Add ("goto", Token.GOTO);
204                         keywords.Add ("handles", Token.HANDLES);
205                         keywords.Add ("if", Token.IF);
206                         keywords.Add ("implements", Token.IMPLEMENTS);
207                         keywords.Add ("imports", Token.IMPORTS);
208                         keywords.Add ("in", Token.IN);
209                         keywords.Add ("inherits", Token.INHERITS);
210                         keywords.Add ("integer", Token.INTEGER);
211                         keywords.Add ("interface", Token.INTERFACE);
212                         keywords.Add ("is", Token.IS);
213                         keywords.Add ("let ", Token.LET ); // An unused VB.NET keyword
214                         keywords.Add ("lib ", Token.LIB );
215                         keywords.Add ("like", Token.LIKE );
216                         keywords.Add ("long", Token.LONG);
217                         keywords.Add ("loop", Token.LOOP);
218                         keywords.Add ("me", Token.ME);
219                         keywords.Add ("mod", Token.MOD);
220                         keywords.Add ("module", Token.MODULE);
221                         keywords.Add ("mustinherit", Token.MUSTINHERIT);
222                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
223                         keywords.Add ("mybase", Token.MYBASE);
224                         keywords.Add ("myclass", Token.MYCLASS);
225                         keywords.Add ("namespace", Token.NAMESPACE);
226                         keywords.Add ("new", Token.NEW);
227                         keywords.Add ("next", Token.NEXT);
228                         keywords.Add ("not", Token.NOT);
229                         keywords.Add ("nothing", Token.NOTHING);
230                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);
231                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
232                         keywords.Add ("object", Token.OBJECT);
233                         keywords.Add ("off", Token.OFF); // Not a VB.NET Keyword 
234                         keywords.Add ("on", Token.ON);
235                         keywords.Add ("option", Token.OPTION);
236                         keywords.Add ("optional", Token.OPTIONAL);
237                         keywords.Add ("or", Token.OR);
238                         keywords.Add ("orelse", Token.ORELSE);
239                         keywords.Add ("overloads", Token.OVERLOADS);
240                         keywords.Add ("overridable", Token.OVERRIDABLE);
241                         keywords.Add ("overrides", Token.OVERRIDES);
242                         keywords.Add ("paramarray", Token.PARAM_ARRAY);
243                         keywords.Add ("preserve", Token.PRESERVE);
244                         keywords.Add ("private", Token.PRIVATE);
245                         keywords.Add ("property", Token.PROPERTY);
246                         keywords.Add ("protected", Token.PROTECTED);
247                         keywords.Add ("public", Token.PUBLIC);
248                         keywords.Add ("raiseevent", Token.RAISEEVENT);
249                         keywords.Add ("readonly", Token.READONLY);
250                         keywords.Add ("redim", Token.REDIM);
251                         keywords.Add ("rem", Token.REM);
252                         keywords.Add ("removehandler", Token.REMOVEHANDLER);
253                         keywords.Add ("resume", Token.RESUME);
254                         keywords.Add ("return", Token.RETURN);
255                         keywords.Add ("select", Token.SELECT);
256                         keywords.Add ("set", Token.SET);
257                         keywords.Add ("shadows", Token.SHADOWS);
258                         keywords.Add ("shared", Token.SHARED);
259                         keywords.Add ("short", Token.SHORT);
260                         keywords.Add ("single", Token.SINGLE);
261                         keywords.Add ("sizeof", Token.SIZEOF); // Not a VB.NET Keyword 
262                         keywords.Add ("static", Token.STATIC);
263                         keywords.Add ("step", Token.STEP);
264                         keywords.Add ("stop", Token.STOP);
265                         keywords.Add ("strict", Token.STRICT); // Not a VB.NET Keyword 
266                         keywords.Add ("string", Token.STRING);
267                         keywords.Add ("structure", Token.STRUCTURE);
268                         keywords.Add ("sub", Token.SUB);
269                         keywords.Add ("synclock", Token.SYNCLOCK);
270                         keywords.Add ("text", Token.TEXT); // Not a VB.NET Keyword
271                         keywords.Add ("then", Token.THEN);
272                         keywords.Add ("throw", Token.THROW);
273                         keywords.Add ("to", Token.TO);
274                         keywords.Add ("true", Token.TRUE);
275                         keywords.Add ("try", Token.TRY);
276                         keywords.Add ("typeof", Token.TYPEOF);
277                         keywords.Add ("unicode", Token.UNICODE);
278                         keywords.Add ("until", Token.UNTIL);
279                         keywords.Add ("variant", Token.VARIANT); // An unused VB.NET keyword
280                         keywords.Add ("wend", Token.WEND); // An unused VB.NET keyword
281                         keywords.Add ("when", Token.WHEN);
282                         keywords.Add ("while", Token.WHILE);
283                         keywords.Add ("with", Token.WITH);
284                         keywords.Add ("withevents", Token.WITHEVENTS);
285                         keywords.Add ("writeonly", Token.WRITEONLY);
286                         keywords.Add ("xor", Token.XOR);
287
288                         if (Parser.UseExtendedSyntax){
289                                 keywords.Add ("yield", Token.YIELD);
290                         }
291
292                 }
293
294                 static Tokenizer ()
295                 {
296                         initTokens ();
297                         csharp_format_info = new NumberFormatInfo ();
298                         csharp_format_info.CurrencyDecimalSeparator = ".";
299                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
300                 }
301
302                 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
303                 {
304                         this.Source = fname;
305
306                         reader = input;
307
308                         // putback an EOL at the beginning of a stream. This is a convenience that 
309                         // allows pre-processor directives to be added to the beginning of a vb file.
310                         putback('\n');
311                 }
312
313                 bool is_keyword (string name)
314                 {
315                         bool res;
316                         name = name.ToLower();
317
318                         res = keywords.Contains(name);
319                         if ((name == "GET" || name == "SET") && handle_get_set == false)
320                                 return false;
321                         return res;
322                 }
323
324                 int getKeyword (string name)
325                 {
326                         return (int) (keywords [name.ToLower()]);
327                 }
328                 
329                 public Location Location {
330                         get {
331                                 return new Location (ref_line, col);
332                         }
333                 }
334                 
335                 public bool PropertyParsing {
336                         get {
337                                 return handle_get_set;
338                         }
339
340                         set {
341                                 handle_get_set = value;
342                         }
343                 }
344                                 
345                 bool is_identifier_start_character (char c)
346                 {
347                         return Char.IsLetter (c) || c == '_' ;
348                 }
349
350                 bool is_identifier_part_character (char c)
351                 {
352                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
353                 }
354
355                 int is_punct (char c, ref bool doread)
356                 {
357                         int d;
358                         int t;
359
360                         doread = false;
361                         
362                         error_details = c.ToString();
363                         
364                         d = peekChar ();
365                         
366                         switch (c){
367                         case '[':
368                                 return Token.OPEN_BRACKET;
369                         case ']':
370                                 return Token.CLOSE_BRACKET;
371                         case '{':
372                                 return Token.OPEN_BRACE;
373                         case '}':
374                                 return Token.CLOSE_BRACE;                               
375                         case '(':
376                                 return Token.OPEN_PARENS;
377                         case ')':
378                                 return Token.CLOSE_PARENS;
379                         case ',':
380                                 return Token.COMMA;
381                         case '?':
382                                 return Token.INTERR;
383                         case '!':
384                                 if (is_identifier_start_character((char)d) || cant_have_a_type_character)
385                                         return Token.EXCLAMATION;
386                                 return Token.SINGLETYPECHAR;
387                         case '$':
388                                 if (cant_have_a_type_character)
389                                         return Token.ERROR;
390                                 return Token.DOLAR_SIGN;
391                         case '@':
392                                 if (cant_have_a_type_character)
393                                         return Token.ERROR;
394                                 return Token.AT_SIGN;
395                         case '%':
396                                 if (cant_have_a_type_character)
397                                         return Token.ERROR;
398                                 return Token.PERCENT;
399                         case '#':
400                                 if(tokens_seen)
401                                 {
402                                         if (cant_have_a_type_character) 
403                                                 return ExtractDateTimeLiteral();
404                                         else
405                                                 return Token.NUMBER_SIGN;
406                                 }
407                                 else 
408                                 {
409                                         tokens_seen = true;
410                                         return Token.HASH;
411                                 } 
412                         case '&':
413                                 if (!cant_have_a_type_character)
414                                         return Token.LONGTYPECHAR;
415                                 t = handle_integer_literal_in_other_bases(d);
416                                 if (t == Token.NONE) {
417                                         t = Token.OP_CONCAT;
418                                 }
419                                 return t;                       
420                         }
421
422                         if (c == '+'){
423                                 if (d == '+')
424                                         t = Token.OP_INC;
425                                 else 
426                                         return Token.PLUS;
427                                 doread = true;
428                                 return t;
429                         }
430                         if (c == '-'){
431                                 return Token.MINUS;
432                         }
433
434                         if (c == '='){
435                                 return Token.ASSIGN;
436                         }
437
438                         if (c == '*'){
439                                 return Token.STAR;
440                         }
441
442                         if (c == '/'){
443                                 return Token.DIV;
444                         }
445
446                         if (c == '\\'){
447                                 return Token.OP_IDIV;
448                         }
449
450                         if (c == '^'){
451                                 return Token.OP_EXP;
452                         }
453
454                         if (c == '<'){
455                                 if (d == '>')
456                                 {
457                                         doread = true;
458                                         return Token.OP_NE;
459                                 }
460                                 if (d == '='){
461                                         doread = true;
462                                         return Token.OP_LE;
463                                 }
464                                 if (d == '<')
465                                 {
466                                         doread = true;
467                                         return Token.OP_SHIFT_LEFT;
468                                 }
469                                 return Token.OP_LT;
470                         }
471
472                         if (c == '>'){
473                                 if (d == '='){
474                                         doread = true;
475                                         return Token.OP_GE;
476                                 }
477                                 if (d == '>')
478                                 {
479                                         doread = true;
480                                         return Token.OP_SHIFT_RIGHT;
481                                 }
482                                 return Token.OP_GT;
483                         }
484                         
485                         if (c == ':'){
486                                 if (d == '='){
487                                         doread = true;
488                                         return Token.ATTR_ASSIGN;
489                                 }
490                                 return Token.COLON;
491                         }                       
492                         
493                         return Token.ERROR;
494                 }
495
496                 bool decimal_digits (int c)
497                 {
498                         int d;
499                         bool seen_digits = false;
500                         
501                         if (c != -1)
502                                 number.Append ((char) c);
503                         while ((d = peekChar ()) != -1){
504                                 if (Char.IsDigit ((char)d)){
505                                         number.Append ((char) d);
506                                         getChar ();
507                                         seen_digits = true;
508                                 } else
509                                         break;
510                         }
511                         return seen_digits;
512                 }
513
514                 
515                 int real_type_suffix (int c)
516                 {
517                         int t;
518                         
519                         switch (c){
520                         case 'F': case 'f':
521                                 t =  Token.LITERAL_SINGLE;
522                                 break;
523                         case 'R': case 'r':
524                                 t = Token.LITERAL_DOUBLE;
525                                 break;
526                         case 'D': case 'd':
527                                  t= Token.LITERAL_DECIMAL;
528                                 break;
529                         default:
530                                 return Token.NONE;
531                         }
532                         getChar ();
533                         return t;
534                 }
535
536                 int integer_type_suffix (int c)
537                 {
538                         int t;
539                         
540                         try {
541                         
542                                 switch (c){
543                                 case 'S': case 's':
544                                         t =  Token.LITERAL_INTEGER; // SHORT ?
545                         
546                                 // hexadecimal literals - like &H8000S is "-32768" 
547                                 // and not an overflow exception 
548                                 // Check for other literals ???
549
550                                         if(lon == 32768) {
551                                                 val = (short) lon;
552                                         }
553                                         else 
554                                                 val = ((IConvertible)val).ToInt16(null);
555                                         break;
556                                 case 'I': case 'i':
557                                         t = Token.LITERAL_INTEGER;
558                                         val = ((IConvertible)val).ToInt32(null);
559                                         break;
560                                 case 'L': case 'l':
561                                          t= Token.LITERAL_INTEGER; // LONG ?
562                                          val = ((IConvertible)val).ToInt64(null);
563                                         break;
564                                 default:
565                                         if ((long)val <= System.Int32.MaxValue &&
566                                                 (long)val >= System.Int32.MinValue) {
567                                                 val = ((IConvertible)val).ToInt32(null);
568                                                 return Token.LITERAL_INTEGER;
569                                         } else {
570                                                 val = ((IConvertible)val).ToInt64(null);
571                                                 return Token.LITERAL_INTEGER; // LONG ?
572                                         }
573                                 }
574                                 getChar ();
575                                 return t;
576                         } catch (Exception e) {
577                                 val = e.ToString();
578                                 return Token.ERROR;
579                         }
580                 }
581                 
582                 int adjust_real (int t)
583                 {
584                         string s = number.ToString ();
585
586                         switch (t){
587                         case Token.LITERAL_DECIMAL:
588                                 val = new System.Decimal ();
589                                 val = System.Decimal.Parse (
590                                         s, styles, csharp_format_info);
591                                 break;
592                         case Token.LITERAL_DOUBLE:
593                                 val = new System.Double ();
594                                 val = System.Double.Parse (
595                                         s, styles, csharp_format_info);
596                                 break;
597                         case Token.LITERAL_SINGLE:
598                                 val = new System.Double ();
599                                 val = (float) System.Double.Parse (
600                                         s, styles, csharp_format_info);
601                                 break;
602
603                         case Token.NONE:
604                                 val = new System.Double ();
605                                 val = System.Double.Parse (
606                                         s, styles, csharp_format_info);
607                                 t = Token.LITERAL_DOUBLE;
608                                 break;
609                         }
610                         return t;
611                 }
612
613                 long hex_digits ()
614                 {
615                         StringBuilder hexNumber = new StringBuilder ();
616                         
617                         int d;
618
619                         while ((d = peekChar ()) != -1){
620                                 char e = Char.ToUpper ((char) d);
621                                 
622                                 if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
623                                         hexNumber.Append (e);
624                                         getChar ();
625                                 } else
626                                         break;
627                         }
628                         lon = System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
629                         return lon;
630                 }
631
632                 long octal_digits ()
633                 {
634                         long valueToReturn = 0;
635                         
636                         int d;
637
638                         while ((d = peekChar ()) != -1){
639                                 char e = (char)d;                       
640                                 if (Char.IsDigit (e) && (e < '8')){
641                                         valueToReturn *= 8;
642                                         valueToReturn += (d - (int)'0');
643                                         getChar ();
644                                 } else
645                                         break;
646                         }
647                         
648                         return valueToReturn;
649                 }
650
651                 int handle_integer_literal_in_other_bases(int peek)
652                 {
653                         if (peek == 'h' || peek == 'H'){
654                                 getChar ();
655                                 val = hex_digits ();
656                                 return integer_type_suffix (peekChar ());
657                         }
658                         
659                         if (peek == 'o' || peek == 'O'){
660                                 getChar ();
661                                 val = octal_digits ();
662                                 return integer_type_suffix (peekChar ());
663                         }
664                         
665                         return Token.NONE;
666                 }
667                 
668                 //
669                 // Invoked if we know we have .digits or digits
670                 //
671                 int is_number (int c)
672                 {
673                         bool is_real = false;
674                         number = new StringBuilder ();
675                         int type;
676                         bool non_prefixdecimal = false; //To capture decimals like .50
677
678                         number.Length = 0;
679
680                         if (Char.IsDigit ((char)c)){
681                                 decimal_digits (c);
682                                 c = peekChar ();        
683                                 non_prefixdecimal = true;
684                         }
685
686                         //
687                         // We need to handle the case of
688                         // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
689                         //
690                         if (c == '.'){
691                                 if (non_prefixdecimal == false)
692                                          putback ('.');
693                                 if (decimal_digits (getChar())){
694                                         is_real = true;
695                                         c = peekChar ();
696                                 } else {
697                                         putback ('.');
698                                         number.Length -= 1;
699                                         val = System.Int64.Parse(number.ToString());
700                                         return integer_type_suffix('.');
701                                 }
702                         }
703                         
704                         if (c == 'e' || c == 'E'){
705                                 is_real = true;
706                                 number.Append ("e");
707                                 getChar ();
708                                 
709                                 c = peekChar ();
710                                 if (c == '+'){
711                                         number.Append ((char) c);
712                                         getChar ();
713                                         c = peekChar ();
714                                 } else if (c == '-'){
715                                         number.Append ((char) c);
716                                         getChar ();
717                                         c = peekChar ();
718                                 }
719                                 decimal_digits (-1);
720                                 c = peekChar ();
721                         }
722
723                         type = real_type_suffix (c);
724                         if (type == Token.NONE && !is_real){
725                                 val = System.Int64.Parse(number.ToString());
726                                 return integer_type_suffix(c);
727                         }
728                         
729                         return adjust_real (type);
730                 }
731                         
732                 int getChar ()
733                 {
734                         if (putback_char != -1){
735                                 int x = putback_char;
736                                 putback_char = -1;
737
738                                 return x;
739                         }
740                         return reader.Read ();
741                 }
742
743                 int peekChar ()
744                 {
745                         if (putback_char != -1)
746                                 return putback_char;
747                         return reader.Peek ();
748                 }
749                 
750
751                 void putback (int c)
752                 {
753                         if (putback_char != -1)
754                                 throw new Exception ("This should not happen putback on putback");
755                         putback_char = c;
756                 }
757
758                 public bool advance ()
759                 {
760                         return current_token != Token.EOF ;
761                 }
762
763                 public Object Value {
764                         get {
765                                 return val;
766                         }
767                 }
768
769                 public Object value ()
770                 {
771                         return val;
772                 }
773
774                 private bool IsEOL(int currentChar)
775                 {
776                         bool retVal;
777                         
778                         if (currentChar ==  0x0D) {
779                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
780                                         getChar();
781
782                                 retVal = true;
783                         }
784                         else {
785                                 retVal = (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
786                         }
787
788                         if(retVal) {
789                                 nextLine();
790                         }
791
792                         return retVal;
793                 }
794
795                 private int DropComments()              
796                 {
797                         //int d;
798                         while (!IsEOL(/*d =*/ getChar ()))
799                                 col++;
800
801                         return Token.EOL;
802                 }       
803                 
804                 public bool putbacktoken = false;
805                 public bool flag = false;               
806                 int next_token;
807                         
808                 public int token ()
809                 {
810                         int before_last_token = last_token;
811                         last_token = current_token;
812                         do
813                         {
814                                 current_token = xtoken ();
815                                 if(current_token == Token.END) {
816                                         next_token = xtoken();
817                                         putbacktoken = true;
818                                         if (next_token == Token.EOL) 
819                                                 return Token.END_EOL;
820                                          else 
821                                                 return Token.END;
822                                 }       
823                                 if (current_token == Token.COLON) {
824                                         next_token = xtoken();
825                                         putbacktoken = true;
826                                         if (next_token == Token.EOL) {
827                                                 if (last_token != Token.LABELNAME && last_token != Token.LITERAL_INTEGER) {
828                                                         current_token = Token.EOL;
829                                                         putbacktoken = false;
830                                                 }
831                                                 else if (before_last_token == Token.GOTO) {
832                                                         current_token = Token.EOL;
833                                                         putbacktoken = false;
834                                                 }
835                                         }
836                                 }
837                                 if (current_token == 0) 
838                                         return Token.EOF;
839                                 if (current_token == Token.REM)
840                                         current_token = DropComments();
841                         } while (last_token == Token.EOL && current_token == Token.EOL);
842
843                         return current_token;
844                 }
845
846                 private string GetIdentifier()
847                 {
848                         int c = getChar();
849                         if (is_identifier_start_character ((char) c))
850                                 return GetIdentifier(c);
851                         else
852                                 return null;
853                 }
854
855                 private bool IsLabel ()
856                 {
857                         char c = (char) peekChar();
858                         //putback (c);
859                         return (c == ':');
860                 }
861
862                 private string GetIdentifier(int c)
863                 {
864                         StringBuilder id = new StringBuilder ();
865
866                         id.Append ((char) c);
867                                 
868                         while ((c = peekChar ()) != -1) 
869                         {
870                                 if (is_identifier_part_character ((char) c))
871                                 {
872                                         id.Append ((char)getChar ());
873                                         col++;
874                                 } 
875                                 else 
876                                         break;
877                         }
878                         
879                         cant_have_a_type_character = false;
880                         
881                         return id.ToString();
882                 }
883
884                 private bool is_doublequote(int currentChar)
885                 {
886                         return (currentChar == '"' || 
887                                         currentChar == 0x201C || // unicode left double-quote character
888                                         currentChar == 0x201D);  // unicode right double-quote character
889                 }
890                 
891                 private bool is_whitespace(int c)
892                 {
893                         return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
894                 }
895                 
896                 private bool tokens_seen = false;
897                 
898                 private void nextLine()
899                 {
900                         cant_have_a_type_character = true;
901                         line++;
902                         ref_line++;
903                         col = 0;
904                         tokens_seen = false;
905                 }
906
907                 public int xtoken ()
908                 {
909                         int t;
910                         bool doread = false;
911                         int c;
912
913                         if (putbacktoken == true) {
914                                 putbacktoken = false;
915                                 return next_token;
916                         }
917         
918                         val = null;
919                         for (;(c = getChar ()) != -1; col++) {
920                         
921                                 // Handle line continuation character
922                                 if (c == '_') 
923                                 {
924                                         int d = peekChar();
925                                         if (!is_identifier_part_character((char)d)) {
926                                                 while ((c = getChar ()) != -1 && !IsEOL(c)) {}
927                                                 c = getChar ();
928                                                 tokens_seen = true;
929                                         }
930                                 }
931                                         
932                                 
933                                 // white space
934                                 if (is_whitespace(c)) {
935                                         // expand tabs for location
936                                         if (c == '\t')
937                                                 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
938                                         cant_have_a_type_character = true;
939                                         continue;
940                                 }
941                                 
942                                 // Handle line comments.
943                                 if (c == '\'')
944                                         return Token.REM;                                       
945                                 
946                                 // Handle EOL.
947                                 if (IsEOL(c))
948                                 {
949                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping
950                                                 continue;
951                                         return Token.EOL;
952                                 }
953                                 
954                                 // Handle escaped identifiers
955                                 if (c == '[')
956                                 {
957                                         bool is_first_token_in_line = !tokens_seen;
958                                         if ((val = GetIdentifier()) == null)
959                                                 break;
960                                         if ((c = getChar()) != ']')
961                                                 break;
962                                         tokens_seen = true;
963                                         if (IsLabel() && is_first_token_in_line)
964                                                 return Token.LABELNAME;
965
966                                         if (last_token == Token.GOTO)
967                                                 return Token.LABELNAME;
968                                         return Token.IDENTIFIER;
969                                 }
970
971                                 // Handle unescaped identifiers
972                                 if (is_identifier_start_character ((char) c))
973                                 {
974                                         string id;
975                                         bool is_first_token_in_line = !tokens_seen;
976                                         if ((id = GetIdentifier(c)) == null)
977                                                 break;
978                                         val = id;
979                                         tokens_seen = true;
980                                         if (is_keyword(id) && (current_token != Token.DOT))
981                                                 return getKeyword(id);
982
983                                         if (IsLabel() && is_first_token_in_line)
984                                                 return Token.LABELNAME;
985
986                                         if (last_token == Token.GOTO)
987                                                 return Token.LABELNAME;
988                                         return Token.IDENTIFIER;
989                                 }
990
991                                 // Treat string literals
992                                 if (is_doublequote(c)) {
993                                         cant_have_a_type_character = true;
994                                         return ExtractStringOrCharLiteral(c);
995                                 }
996                         
997                                 // handle numeric literals
998
999                                 if (Char.IsDigit ((char) c))
1000                                 {
1001                                         cant_have_a_type_character = false;
1002                                         tokens_seen = true;
1003                                         return is_number (c);
1004                                 }
1005
1006                                 if (c == '.')
1007                                 {
1008                                         cant_have_a_type_character = true;
1009                                         tokens_seen = true;
1010                                         if (Char.IsDigit ((char) peekChar ()))
1011                                                 return is_number (c);
1012                                         return Token.DOT;
1013                                 }
1014                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
1015                                         cant_have_a_type_character = true;
1016
1017                                         if (t == Token.NONE)
1018                                                 continue;
1019                                                 
1020                                         if (doread){
1021                                                 getChar ();
1022                                                 col++;
1023                                         }
1024                                         tokens_seen = true;
1025                                         return t;
1026                                 }
1027                                 
1028                                 error_details = ((char)c).ToString ();
1029                                 return Token.ERROR;
1030                         }
1031
1032                         if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
1033                                 return Token.EOL;
1034                         
1035                         return Token.EOF;
1036                 }
1037
1038                 private int ExtractDateTimeLiteral()
1039                 {
1040                         int c;
1041                         
1042                         StringBuilder sb = new StringBuilder();
1043                         for (;(c = getChar ()) != -1; col++)
1044                         {
1045                                 if (c == '#') {
1046                                         val = ParseDateLiteral(sb);
1047                                         return Token.LITERAL_DATE;
1048                                 }
1049                                 if (IsEOL(c)) {
1050                                         break;
1051                                 } 
1052                                 if (c == '-')
1053                                         c = '/';
1054                                 sb.Append((char)c);
1055                         }
1056                         return Token.ERROR;
1057                 }
1058                 
1059                 private int ExtractStringOrCharLiteral(int c)
1060                 {
1061                         StringBuilder s = new StringBuilder ();
1062
1063                         tokens_seen = true;
1064
1065                         while ((c = getChar ()) != -1){
1066                                 if (is_doublequote(c)){
1067                                         if (is_doublequote(peekChar()))
1068                                                 getChar();
1069                                         else {
1070                                                 //handle Char Literals
1071                                                 if (peekChar() == 'C' || peekChar() == 'c') {
1072                                                         getChar();
1073                                                         if (s.Length == 1) {
1074                                                                 val = s[0];
1075                                                                 return Token.LITERAL_CHARACTER;
1076                                                         } else {
1077                                                                 val = "Incorrect length for a character literal";
1078                                                                 return Token.ERROR;
1079                                                         }                                                       
1080                                                 } else {
1081                                                         val = s.ToString ();
1082                                                         return Token.LITERAL_STRING;
1083                                                 }
1084                                         }
1085                                 }
1086
1087                                 if (IsEOL(c)) {
1088                                         return Token.ERROR;
1089                                 }
1090                         
1091                                 s.Append ((char) c);
1092                         }
1093                                         
1094                         return Token.ERROR;
1095                 }
1096
1097                 static IFormatProvider enUSculture = new CultureInfo("en-US", true);
1098
1099                 private DateTime ParseDateLiteral(StringBuilder value)
1100                 {
1101                         try
1102                         {
1103                                 return DateTime.Parse(value.ToString(),
1104                                                   enUSculture,
1105                                                   DateTimeStyles.NoCurrentDateDefault | DateTimeStyles.AllowWhiteSpaces);
1106                         }
1107                         catch (FormatException ex)
1108                         {
1109                                 //TODO: What is the correct error number and message?
1110                                 Report.Error (1, Location, string.Format("Invalid date literal '{0}'", value.ToString()) 
1111                                         + Environment.NewLine + ex.ToString());
1112                         }
1113                         catch (Exception)
1114                         {
1115                                 Report.Error (1, Location, "Error parsing date literal");       //TODO: What is the correct error number and message?
1116                         }
1117                         return new DateTime();
1118                 }
1119  
1120                 public void PositionCursorAtNextPreProcessorDirective()
1121                 {
1122                         int t;
1123                         
1124                         for(t = token(); t != Token.HASH && t != Token.EOF ; t = token()); 
1125
1126                         if(t == Token.EOF)
1127                                 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1128                         
1129                         if(t == Token.HASH) {
1130                                 tokens_seen = false;
1131                                 putback('#');
1132                         }
1133                 }
1134
1135         }
1136 }