2004-08-03 Martin Baulig <martin@ximian.com>
[mono.git] / mcs / mbas / mb-tokenizer.cs
1 //
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
3 //
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 //         
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
7 //
8 // Licensed under the terms of the GNU GPL
9 //
10 // Copyright (C) 2001 A Rafael D Teixeira
11 //
12
13 namespace Mono.MonoBASIC
14 {
15         using System;
16         using System.Text;
17         using System.Collections;
18         using System.IO;
19         using System.Globalization;
20         using Mono.Languages;
21         using Mono.MonoBASIC;
22         
23         /// <summary>
24         ///    Tokenizer for MonoBASIC source code. 
25         /// </summary>
26         
27         public class Tokenizer : yyParser.yyInput
28         {
29                 TextReader reader;
30                 string file_name;
31                 string ref_name;
32                 int ref_line = 0;
33                 int line = 0;
34                 int col = 1;
35                 public int current_token = Token.ERROR;
36                 bool handle_get_set = false;
37                 bool cant_have_a_type_character = false;
38
39                 public int ExpandedTabsSize = 4; 
40
41                 public string location {
42                         get {
43                                 string det;
44
45                                 if (current_token == Token.ERROR)
46                                         det = "detail: " + error_details;
47                                 else
48                                         det = "";
49                                 
50                                 return "Line:     "+line+" Col: "+col + "\n" +
51                                        "VirtLine: "+ref_line +
52                                        " Token: "+current_token + " " + det;
53                         }
54                 }
55
56                 public bool properties {
57                         get {
58                                 return handle_get_set;
59                         }
60
61                         set {
62                                 handle_get_set = value;
63                         }
64                 }
65                 
66                 //
67                 // Class variables
68                 // 
69                 static Hashtable keywords;
70                 static NumberStyles styles;
71                 static NumberFormatInfo csharp_format_info;
72                 
73                 //
74                 // Values for the associated token returned
75                 //
76                 StringBuilder number;
77                 int putback_char = -1;
78                 Object val;
79                 
80                 //
81                 // Details about the error encoutered by the tokenizer
82                 //
83                 string error_details;
84                 
85                 public string error {
86                         get {
87                                 return error_details;
88                         }
89                 }
90
91                 
92                 public string Source {
93                         get {
94                                 return file_name;
95                         }
96
97                         set {
98                                 file_name = value;
99                                 ref_name = value;
100                                 Location.SetCurrentSource(file_name);
101                         }
102                 }
103
104                 public string EffectiveSource {
105                         get {
106                                 return ref_name;
107                         }
108                         set {
109                                 ref_name = value;
110                                 Location.SetCurrentSource(ref_name);
111                         }
112                 }
113
114                 public int Line {
115                         get {
116                                 return line;
117                         }
118                 }
119
120                 public int EffectiveLine {
121                         get {
122                                 return ref_line;
123                         }
124                         set {
125                                 ref_line = value;
126                         }
127                 }
128
129                 public int Col {
130                         get {
131                                 return col;
132                         }
133                 }
134                 
135                 static void initTokens ()
136                 {
137                         keywords = new Hashtable ();
138
139                         keywords.Add ("addhandler", Token.ADDHANDLER);
140                         keywords.Add ("addressof", Token.ADDRESSOF);
141                         keywords.Add ("alias", Token.ALIAS);
142                         keywords.Add ("and", Token.AND);
143                         keywords.Add ("andalso", Token.ANDALSO);
144                         keywords.Add ("ansi", Token.ANSI);
145                         keywords.Add ("as", Token.AS);
146                         keywords.Add ("assembly", Token.ASSEMBLY);
147                         keywords.Add ("auto", Token.AUTO);
148                         keywords.Add ("binary", Token.BINARY);
149                         keywords.Add ("boolean", Token.BOOLEAN);
150                         keywords.Add ("byref", Token.BYREF);
151                         keywords.Add ("byte", Token.BYTE);
152                         keywords.Add ("byval", Token.BYVAL);
153                         keywords.Add ("call", Token.CALL);
154                         keywords.Add ("case", Token.CASE);
155                         keywords.Add ("catch", Token.CATCH);
156                         keywords.Add ("cbool", Token.CBOOL);
157                         keywords.Add ("cbyte", Token.CBYTE);
158                         keywords.Add ("cchar", Token.CCHAR);
159                         keywords.Add ("cdate", Token.CDATE);
160                         keywords.Add ("cdec", Token.CDEC);
161                         keywords.Add ("cdbl", Token.CDBL);
162                         keywords.Add ("char", Token.CHAR);
163                         keywords.Add ("cint", Token.CINT);
164                         keywords.Add ("class", Token.CLASS);
165                         keywords.Add ("clng", Token.CLNG);
166                         keywords.Add ("cobj", Token.COBJ);
167                         keywords.Add ("compare", Token.COMPARE);
168                         keywords.Add ("const", Token.CONST);
169                         keywords.Add ("cshort", Token.CSHORT);
170                         keywords.Add ("csng", Token.CSNG);
171                         keywords.Add ("cstr", Token.CSTR);
172                         keywords.Add ("ctype", Token.CTYPE);
173                         keywords.Add ("date", Token.DATE);
174                         keywords.Add ("decimal", Token.DECIMAL);
175                         keywords.Add ("declare", Token.DECLARE);
176                         keywords.Add ("default", Token.DEFAULT);
177                         keywords.Add ("delegate", Token.DELEGATE);
178                         keywords.Add ("dim", Token.DIM);
179                         keywords.Add ("do", Token.DO);
180                         keywords.Add ("double", Token.DOUBLE);
181                         keywords.Add ("each", Token.EACH);
182                         keywords.Add ("else", Token.ELSE);
183                         keywords.Add ("elseif", Token.ELSEIF);
184                         keywords.Add ("end", Token.END);
185                         keywords.Add ("enum", Token.ENUM);
186                         keywords.Add ("erase", Token.ERASE);
187                         keywords.Add ("error", Token.ERROR);
188                         keywords.Add ("event", Token.EVENT);
189                         keywords.Add ("exit", Token.EXIT);
190                         keywords.Add ("explicit", Token.EXPLICIT);
191                         keywords.Add ("false", Token.FALSE);
192                         keywords.Add ("finally", Token.FINALLY);
193                         keywords.Add ("for", Token.FOR);
194                         keywords.Add ("friend", Token.FRIEND);
195                         keywords.Add ("function", Token.FUNCTION);
196                         keywords.Add ("get", Token.GET);
197                         //keywords.Add ("gettype", Token.GETTYPE);
198                         keywords.Add ("goto", Token.GOTO);
199                         keywords.Add ("handles", Token.HANDLES);
200                         keywords.Add ("if", Token.IF);
201                         keywords.Add ("implements", Token.IMPLEMENTS);
202                         keywords.Add ("imports", Token.IMPORTS);
203                         keywords.Add ("in", Token.IN);
204                         keywords.Add ("inherits", Token.INHERITS);
205                         keywords.Add ("integer", Token.INTEGER);
206                         keywords.Add ("interface", Token.INTERFACE);
207                         keywords.Add ("is", Token.IS);
208                         keywords.Add ("let ", Token.LET );
209                         keywords.Add ("lib ", Token.LIB );
210                         keywords.Add ("like ", Token.LIKE );
211                         keywords.Add ("long", Token.LONG);
212                         keywords.Add ("loop", Token.LOOP);
213                         keywords.Add ("me", Token.ME);
214                         keywords.Add ("mod", Token.MOD);
215                         keywords.Add ("module", Token.MODULE);
216                         keywords.Add ("mustinherit", Token.MUSTINHERIT);
217                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
218                         keywords.Add ("mybase", Token.MYBASE);
219                         keywords.Add ("myclass", Token.MYCLASS);
220                         keywords.Add ("namespace", Token.NAMESPACE);
221                         keywords.Add ("new", Token.NEW);
222                         keywords.Add ("next", Token.NEXT);
223                         keywords.Add ("not", Token.NOT);
224                         keywords.Add ("nothing", Token.NOTHING);
225                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);
226                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
227                         keywords.Add ("object", Token.OBJECT);
228                         keywords.Add ("off", Token.OFF);
229                         keywords.Add ("on", Token.ON);
230                         keywords.Add ("option", Token.OPTION);
231                         keywords.Add ("optional", Token.OPTIONAL);
232                         keywords.Add ("or", Token.OR);
233                         keywords.Add ("orelse", Token.ORELSE);
234                         keywords.Add ("overloads", Token.OVERLOADS);
235                         keywords.Add ("overridable", Token.OVERRIDABLE);
236                         keywords.Add ("overrides", Token.OVERRIDES);
237                         keywords.Add ("paramarray", Token.PARAM_ARRAY);
238                         keywords.Add ("preserve", Token.PRESERVE);
239                         keywords.Add ("private", Token.PRIVATE);
240                         keywords.Add ("property", Token.PROPERTY);
241                         keywords.Add ("protected", Token.PROTECTED);
242                         keywords.Add ("public", Token.PUBLIC);
243                         keywords.Add ("raiseevent", Token.RAISEEVENT);
244                         keywords.Add ("readonly", Token.READONLY);
245                         keywords.Add ("redim", Token.REDIM);
246                         keywords.Add ("rem", Token.REM);
247                         keywords.Add ("removehandler", Token.REMOVEHANDLER);
248                         keywords.Add ("resume", Token.RESUME);
249                         keywords.Add ("return", Token.RETURN);
250                         keywords.Add ("select", Token.SELECT);
251                         keywords.Add ("set", Token.SET);
252                         keywords.Add ("shadows", Token.SHADOWS);
253                         keywords.Add ("shared", Token.SHARED);
254                         keywords.Add ("short", Token.SHORT);
255                         keywords.Add ("single", Token.SINGLE);
256                         keywords.Add ("sizeof", Token.SIZEOF);
257                         keywords.Add ("static", Token.STATIC);
258                         keywords.Add ("step", Token.STEP);
259                         keywords.Add ("stop", Token.STOP);
260                         keywords.Add ("strict", Token.STRICT);
261                         keywords.Add ("string", Token.STRING);
262                         keywords.Add ("structure", Token.STRUCTURE);
263                         keywords.Add ("sub", Token.SUB);
264                         keywords.Add ("synclock", Token.SYNCLOCK);
265                         keywords.Add ("text", Token.TEXT);
266                         keywords.Add ("then", Token.THEN);
267                         keywords.Add ("throw", Token.THROW);
268                         keywords.Add ("to", Token.TO);
269                         keywords.Add ("true", Token.TRUE);
270                         keywords.Add ("try", Token.TRY);
271                         keywords.Add ("typeof", Token.TYPEOF);
272                         keywords.Add ("unicode", Token.UNICODE);
273                         keywords.Add ("until", Token.UNTIL);
274                         keywords.Add ("variant", Token.VARIANT);
275                         keywords.Add ("when", Token.WHEN);
276                         keywords.Add ("while", Token.WHILE);
277                         keywords.Add ("with", Token.WITH);
278                         keywords.Add ("withevents", Token.WITHEVENTS);
279                         keywords.Add ("writeonly", Token.WRITEONLY);
280                         keywords.Add ("xor", Token.XOR);
281
282                         if (Parser.UseExtendedSyntax){
283                                 keywords.Add ("yield", Token.YIELD);
284                         }
285
286                 }
287
288                 static Tokenizer ()
289                 {
290                         initTokens ();
291                         csharp_format_info = new NumberFormatInfo ();
292                         csharp_format_info.CurrencyDecimalSeparator = ".";
293                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
294                 }
295
296                 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
297                 {
298                         this.Source = fname;
299
300                         reader = input;
301
302                         // putback an EOL at the beginning of a stream. This is a convenience that 
303                         // allows pre-processor directives to be added to the beginning of a vb file.
304                         putback('\n');
305                 }
306
307                 bool is_keyword (string name)
308                 {
309                         bool res;
310                         name = name.ToLower();
311
312                         res = keywords.Contains(name);
313                         if ((name == "GET" || name == "SET") && handle_get_set == false)
314                                 return false;
315                         return res;
316                 }
317
318                 int getKeyword (string name)
319                 {
320                         return (int) (keywords [name.ToLower()]);
321                 }
322                 
323                 public Location Location {
324                         get {
325                                 return new Location (ref_line, col);
326                         }
327                 }
328                 
329                 public bool PropertyParsing {
330                         get {
331                                 return handle_get_set;
332                         }
333
334                         set {
335                                 handle_get_set = value;
336                         }
337                 }
338                                 
339                 bool is_identifier_start_character (char c)
340                 {
341                         return Char.IsLetter (c) || c == '_' ;
342                 }
343
344                 bool is_identifier_part_character (char c)
345                 {
346                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
347                 }
348
349                 int is_punct (char c, ref bool doread)
350                 {
351                         int d;
352                         int t;
353
354                         doread = false;
355                         
356                         error_details = c.ToString();
357                         
358                         d = peekChar ();
359                         
360                         switch (c){
361                         case '[':
362                                 return Token.OPEN_BRACKET;
363                         case ']':
364                                 return Token.CLOSE_BRACKET;
365                         case '{':
366                                 return Token.OPEN_BRACE;
367                         case '}':
368                                 return Token.CLOSE_BRACE;                               
369                         case '(':
370                                 return Token.OPEN_PARENS;
371                         case ')':
372                                 return Token.CLOSE_PARENS;
373                         case ',':
374                                 return Token.COMMA;
375                         case '?':
376                                 return Token.INTERR;
377                         case '!':
378                                 if (is_identifier_start_character((char)d) || cant_have_a_type_character)
379                                         return Token.EXCLAMATION;
380                                 return Token.SINGLETYPECHAR;
381                         case '$':
382                                 if (cant_have_a_type_character)
383                                         return Token.ERROR;
384                                 return Token.DOLAR_SIGN;
385                         case '@':
386                                 if (cant_have_a_type_character)
387                                         return Token.ERROR;
388                                 return Token.AT_SIGN;
389                         case '%':
390                                 if (cant_have_a_type_character)
391                                         return Token.ERROR;
392                                 return Token.PERCENT;
393                         case '#':
394                                 if(tokens_seen)
395                                 {
396                                         if (cant_have_a_type_character) 
397                                                 return ExtractDateTimeLiteral();
398                                         else
399                                                 return Token.NUMBER_SIGN;
400                                 }
401                                 else 
402                                 {
403                                         tokens_seen = true;
404                                         return Token.HASH;
405                                 } 
406                         case '&':
407                                 if (!cant_have_a_type_character)
408                                         return Token.LONGTYPECHAR;
409                                 t = handle_integer_literal_in_other_bases(d);
410                                 if (t == Token.NONE) {
411                                         if (d == '=') {
412                                                 doread = true;
413                                                 t = Token.OP_CONCAT_ASSIGN;
414                                         } else 
415                                                 t = Token.OP_CONCAT;
416                                 }
417                                 return t;                       
418                         }
419
420                         if (c == '+'){
421                                 if (d == '+')
422                                         t = Token.OP_INC;
423                                 else if (d == '=')
424                                         t = Token.OP_ADD_ASSIGN;
425                                 else
426                                         return Token.PLUS;
427                                 doread = true;
428                                 return t;
429                         }
430                         if (c == '-'){
431                                 if (d == '=')
432                                         t = Token.OP_SUB_ASSIGN;
433                                 else
434                                         return Token.MINUS;
435                                 doread = true;
436                                 return t;
437                         }
438
439                         if (c == '='){
440                                 return Token.ASSIGN;
441                         }
442
443                         if (c == '*'){
444                                 if (d == '='){
445                                         doread = true;
446                                         return Token.OP_MULT_ASSIGN;
447                                 }
448                                 return Token.STAR;
449                         }
450
451                         if (c == '/'){
452                                 if (d == '='){
453                                         doread = true;
454                                         return Token.OP_DIV_ASSIGN;
455                                 }
456                                 return Token.DIV;
457                         }
458
459                         if (c == '\\'){
460                                 if (d == '='){
461                                         doread = true;
462                                         return Token.OP_IDIV_ASSIGN;
463                                 }
464                                 return Token.OP_IDIV;
465                         }
466
467                         if (c == '^'){
468                                 if (d == '='){
469                                         doread = true;
470                                         return Token.OP_EXP_ASSIGN;
471                                 }
472                                 return Token.OP_EXP;
473                         }
474
475                         if (c == '<'){
476                                 if (d == '>')
477                                 {
478                                         doread = true;
479                                         return Token.OP_NE;
480                                 }
481                                 if (d == '='){
482                                         doread = true;
483                                         return Token.OP_LE;
484                                 }
485                                 if (d == '<')
486                                 {
487                                         doread = true;
488                                         return Token.OP_SHIFT_LEFT;
489                                 }
490                                 return Token.OP_LT;
491                         }
492
493                         if (c == '>'){
494                                 if (d == '='){
495                                         doread = true;
496                                         return Token.OP_GE;
497                                 }
498                                 if (d == '>')
499                                 {
500                                         doread = true;
501                                         return Token.OP_SHIFT_RIGHT;
502                                 }
503                                 return Token.OP_GT;
504                         }
505                         
506                         if (c == ':'){
507                                 if (d == '='){
508                                         doread = true;
509                                         return Token.ATTR_ASSIGN;
510                                 }
511                                 return Token.COLON;
512                         }                       
513                         
514                         return Token.ERROR;
515                 }
516
517                 bool decimal_digits (int c)
518                 {
519                         int d;
520                         bool seen_digits = false;
521                         
522                         if (c != -1)
523                                 number.Append ((char) c);
524                         
525                         while ((d = peekChar ()) != -1){
526                                 if (Char.IsDigit ((char)d)){
527                                         number.Append ((char) d);
528                                         getChar ();
529                                         seen_digits = true;
530                                 } else
531                                         break;
532                         }
533                         return seen_digits;
534                 }
535
536                 
537                 int real_type_suffix (int c)
538                 {
539                         int t;
540                         
541                         switch (c){
542                         case 'F': case 'f':
543                                 t =  Token.LITERAL_SINGLE;
544                                 break;
545                         case 'R': case 'r':
546                                 t = Token.LITERAL_DOUBLE;
547                                 break;
548                         case 'D': case 'd':
549                                  t= Token.LITERAL_DECIMAL;
550                                 break;
551                         default:
552                                 return Token.NONE;
553                         }
554                         getChar ();
555                         return t;
556                 }
557
558                 int integer_type_suffix (int c)
559                 {
560                         int t;
561                         
562                         try {
563                         
564                                 switch (c){
565                                 case 'S': case 's':
566                                         t =  Token.LITERAL_INTEGER; // SHORT ?
567                                         val = ((IConvertible)val).ToInt16(null);
568                                         break;
569                                 case 'I': case 'i':
570                                         t = Token.LITERAL_INTEGER;
571                                         val = ((IConvertible)val).ToInt32(null);
572                                         break;
573                                 case 'L': case 'l':
574                                          t= Token.LITERAL_INTEGER; // LONG ?
575                                          val = ((IConvertible)val).ToInt64(null);
576                                         break;
577                                 default:
578                                         if ((long)val <= System.Int32.MaxValue &&
579                                                 (long)val >= System.Int32.MinValue) {
580                                                 val = ((IConvertible)val).ToInt32(null);
581                                                 return Token.LITERAL_INTEGER;
582                                         } else {
583                                                 val = ((IConvertible)val).ToInt64(null);
584                                                 return Token.LITERAL_INTEGER; // LONG ?
585                                         }
586                                 }
587                                 getChar ();
588                                 return t;
589                         } catch (Exception e) {
590                                 val = e.ToString();
591                                 return Token.ERROR;
592                         }
593                 }
594                 
595                 int adjust_real (int t)
596                 {
597                         string s = number.ToString ();
598
599                         switch (t){
600                         case Token.LITERAL_DECIMAL:
601                                 val = new System.Decimal ();
602                                 val = System.Decimal.Parse (
603                                         s, styles, csharp_format_info);
604                                 break;
605                         case Token.LITERAL_DOUBLE:
606                                 val = new System.Double ();
607                                 val = System.Double.Parse (
608                                         s, styles, csharp_format_info);
609                                 break;
610                         case Token.LITERAL_SINGLE:
611                                 val = new System.Double ();
612                                 val = (float) System.Double.Parse (
613                                         s, styles, csharp_format_info);
614                                 break;
615
616                         case Token.NONE:
617                                 val = new System.Double ();
618                                 val = System.Double.Parse (
619                                         s, styles, csharp_format_info);
620                                 t = Token.LITERAL_DOUBLE;
621                                 break;
622                         }
623                         return t;
624                 }
625
626                 long hex_digits ()
627                 {
628                         StringBuilder hexNumber = new StringBuilder ();
629                         
630                         int d;
631
632                         while ((d = peekChar ()) != -1){
633                                 char e = Char.ToUpper ((char) d);
634                                 
635                                 if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
636                                         hexNumber.Append (e);
637                                         getChar ();
638                                 } else
639                                         break;
640                         }
641                         return System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
642                 }
643
644                 long octal_digits ()
645                 {
646                         long valueToReturn = 0;
647                         
648                         int d;
649
650                         while ((d = peekChar ()) != -1){
651                                 char e = (char)d;                       
652                                 if (Char.IsDigit (e) && (e < '8')){
653                                         valueToReturn *= 8;
654                                         valueToReturn += (d - (int)'0');
655                                         getChar ();
656                                 } else
657                                         break;
658                         }
659                         
660                         return valueToReturn;
661                 }
662
663                 int handle_integer_literal_in_other_bases(int peek)
664                 {
665                         if (peek == 'h' || peek == 'H'){
666                                 getChar ();
667                                 val = hex_digits ();
668                                 return integer_type_suffix (peekChar ());
669                         }
670                         
671                         if (peek == 'o' || peek == 'O'){
672                                 getChar ();
673                                 val = octal_digits ();
674                                 return integer_type_suffix (peekChar ());
675                         }
676                         
677                         return Token.NONE;
678                 }
679                 
680                 //
681                 // Invoked if we know we have .digits or digits
682                 //
683                 int is_number (int c)
684                 {
685                         bool is_real = false;
686                         number = new StringBuilder ();
687                         int type;
688
689                         number.Length = 0;
690
691                         if (Char.IsDigit ((char)c)){
692                                 decimal_digits (c);
693                                 c = peekChar ();
694                         }
695
696                         //
697                         // We need to handle the case of
698                         // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
699                         //
700                         if (c == '.'){
701                                 if (decimal_digits (getChar())){
702                                         is_real = true;
703                                         c = peekChar ();
704                                 } else {
705                                         putback ('.');
706                                         number.Length -= 1;
707                                         val = System.Int64.Parse(number.ToString());
708                                         return integer_type_suffix('.');
709                                 }
710                         }
711                         
712                         if (c == 'e' || c == 'E'){
713                                 is_real = true;
714                                 number.Append ("e");
715                                 getChar ();
716                                 
717                                 c = peekChar ();
718                                 if (c == '+'){
719                                         number.Append ((char) c);
720                                         getChar ();
721                                         c = peekChar ();
722                                 } else if (c == '-'){
723                                         number.Append ((char) c);
724                                         getChar ();
725                                         c = peekChar ();
726                                 }
727                                 decimal_digits (-1);
728                                 c = peekChar ();
729                         }
730
731                         type = real_type_suffix (c);
732                         if (type == Token.NONE && !is_real){
733                                 val = System.Int64.Parse(number.ToString());
734                                 return integer_type_suffix(c);
735                         }
736                         
737                         return adjust_real (type);
738                 }
739                         
740                 int getChar ()
741                 {
742                         if (putback_char != -1){
743                                 int x = putback_char;
744                                 putback_char = -1;
745
746                                 return x;
747                         }
748                         return reader.Read ();
749                 }
750
751                 int peekChar ()
752                 {
753                         if (putback_char != -1)
754                                 return putback_char;
755                         return reader.Peek ();
756                 }
757
758                 void putback (int c)
759                 {
760                         if (putback_char != -1)
761                                 throw new Exception ("This should not happen putback on putback");
762                         putback_char = c;
763                 }
764
765                 public bool advance ()
766                 {
767                         return current_token != Token.EOF ;
768                 }
769
770                 public Object Value {
771                         get {
772                                 return val;
773                         }
774                 }
775
776                 public Object value ()
777                 {
778                         return val;
779                 }
780
781                 private bool IsEOL(int currentChar)
782                 {
783                         bool retVal;
784                         
785                         if (currentChar ==  0x0D) {
786                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
787                                         getChar();
788
789                                 retVal = true;
790                         }
791                         else {
792                                 retVal = (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
793                         }
794
795                         if(retVal) {
796                                 nextLine();
797                         }
798
799                         return retVal;
800                 }
801
802                 private int DropComments()              
803                 {
804                         int d;
805                         while (!IsEOL(d = getChar ()))
806                                 col++;
807
808                         return Token.EOL;
809                 }       
810                         
811                 public int token ()
812                 {
813                         int lastToken = current_token;
814                         do
815                         {
816                                 current_token = xtoken ();
817                                 if (current_token == 0) 
818                                         return Token.EOF;
819                                 if (current_token == Token.REM)
820                                         current_token = DropComments();
821                         } while (lastToken == Token.EOL && current_token == Token.EOL);
822
823                         return current_token;
824                 }
825
826                 private string GetIdentifier()
827                 {
828                         int c = getChar();
829                         if (is_identifier_start_character ((char) c))
830                                 return GetIdentifier(c);
831                         else
832                                 return null;
833                 }
834
835                 private string GetIdentifier(int c)
836                 {
837                         StringBuilder id = new StringBuilder ();
838
839                         id.Append ((char) c);
840                                 
841                         while ((c = peekChar ()) != -1) 
842                         {
843                                 if (is_identifier_part_character ((char) c))
844                                 {
845                                         id.Append ((char)getChar ());
846                                         col++;
847                                 } 
848                                 else 
849                                         break;
850                         }
851                         
852                         cant_have_a_type_character = false;
853                         
854                         return id.ToString();
855                 }
856
857                 private bool is_doublequote(int currentChar)
858                 {
859                         return (currentChar == '"' || 
860                                         currentChar == 0x201C || // unicode left double-quote character
861                                         currentChar == 0x201D);  // unicode right double-quote character
862                 }
863                 
864                 private bool is_whitespace(int c)
865                 {
866                         return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
867                 }
868                 
869                 private bool tokens_seen = false;
870                 
871                 private void nextLine()
872                 {
873                         cant_have_a_type_character = true;
874                         line++;
875                         ref_line++;
876                         col = 0;
877                         tokens_seen = false;
878                 }
879
880                 public int xtoken ()
881                 {
882                         int t;
883                         bool doread = false;
884                         int c;
885
886                         val = null;
887                         for (;(c = getChar ()) != -1; col++) {
888                         
889                                 // Handle line continuation character
890                                 if (c == '_') 
891                                 {
892                                         int d = peekChar();
893                                         if (!is_identifier_part_character((char)d)) {
894                                                 while ((c = getChar ()) != -1 && !IsEOL(c)) {}
895                                                 c = getChar ();                 
896                                         }               
897                                 }
898
899                                 // white space
900                                 if (is_whitespace(c)) {
901                                         // expand tabs for location
902                                         if (c == '\t')
903                                                 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
904                                         cant_have_a_type_character = true;
905                                         continue;
906                                 }
907                                 
908                                 // Handle line comments.
909                                 if (c == '\'')
910                                         return Token.REM;                                       
911                                 
912                                 // Handle EOL.
913                                 if (IsEOL(c))
914                                 {
915                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping
916                                                 continue;
917                                         return Token.EOL;
918                                 }
919                                 
920                                 // Handle escaped identifiers
921                                 if (c == '[')
922                                 {
923                                         if ((val = GetIdentifier()) == null)
924                                                 break;
925                                         if ((c = getChar()) != ']')
926                                                 break;
927                                         tokens_seen = true;
928                                         return Token.IDENTIFIER;
929                                 }
930
931                                 // Handle unescaped identifiers
932                                 if (is_identifier_start_character ((char) c))
933                                 {
934                                         string id;
935                                         if ((id = GetIdentifier(c)) == null)
936                                                 break;
937                                         val = id;
938                                         tokens_seen = true;
939                                         if (is_keyword(id) && (current_token != Token.DOT))
940                                                 return getKeyword(id);
941                                         return Token.IDENTIFIER;
942                                 }
943
944                                 // Treat string literals
945                                 if (is_doublequote(c)) {
946                                         cant_have_a_type_character = true;
947                                         return ExtractStringOrCharLiteral(c);
948                                 }
949                         
950                                 // handle numeric literals
951                                 if (c == '.')
952                                 {
953                                         cant_have_a_type_character = true;
954                                         tokens_seen = true;
955                                         if (Char.IsDigit ((char) peekChar ()))
956                                                 return is_number (c);
957                                         return Token.DOT;
958                                 }
959                                 
960                                 if (Char.IsDigit ((char) c))
961                                 {
962                                         cant_have_a_type_character = true;
963                                         tokens_seen = true;
964                                         return is_number (c);
965                                 }
966
967                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
968                                         cant_have_a_type_character = true;
969
970                                         if (t == Token.NONE)
971                                                 continue;
972                                                 
973                                         if (doread){
974                                                 getChar ();
975                                                 col++;
976                                         }
977                                         tokens_seen = true;
978                                         return t;
979                                 }
980                                 
981                                 error_details = ((char)c).ToString ();
982                                 return Token.ERROR;
983                         }
984
985                         if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
986                                 return Token.EOL;
987                         
988                         return Token.EOF;
989                 }
990
991                 private int ExtractDateTimeLiteral()
992                 {
993                         int c;
994                         
995                         StringBuilder sb = new StringBuilder();
996                         for (;(c = getChar ()) != -1; col++)
997                         {
998                                 if (c == '#') {
999                                         val = ParseDateLiteral(sb);
1000                                         return Token.LITERAL_DATE;
1001                                 }
1002                                 if (IsEOL(c)) {
1003                                         break;
1004                                 } 
1005                                 if (c == '-')
1006                                         c = '/';
1007                                 sb.Append((char)c);
1008                         }
1009                         return Token.ERROR;
1010                 }
1011                 
1012                 private int ExtractStringOrCharLiteral(int c)
1013                 {
1014                         StringBuilder s = new StringBuilder ();
1015
1016                         tokens_seen = true;
1017
1018                         while ((c = getChar ()) != -1){
1019                                 if (is_doublequote(c)){
1020                                         if (is_doublequote(peekChar()))
1021                                                 getChar();
1022                                         else {
1023                                                 //handle Char Literals
1024                                                 if (peekChar() == 'C' || peekChar() == 'c') {
1025                                                         getChar();
1026                                                         if (s.Length == 1) {
1027                                                                 val = s[0];
1028                                                                 return Token.LITERAL_CHARACTER;
1029                                                         } else {
1030                                                                 val = "Incorrect length for a character literal";
1031                                                                 return Token.ERROR;
1032                                                         }                                                       
1033                                                 } else {
1034                                                         val = s.ToString ();
1035                                                         return Token.LITERAL_STRING;
1036                                                 }
1037                                         }
1038                                 }
1039
1040                                 if (IsEOL(c)) {
1041                                         return Token.ERROR;
1042                                 }
1043                         
1044                                 s.Append ((char) c);
1045                         }
1046                                         
1047                         return Token.ERROR;
1048                 }
1049
1050                 static IFormatProvider enUSculture = new CultureInfo("en-US", true);
1051
1052                 private DateTime ParseDateLiteral(StringBuilder value)
1053                 {
1054                         try
1055                         {
1056                                 return DateTime.Parse(value.ToString(),
1057                                                   enUSculture,
1058                                                   DateTimeStyles.NoCurrentDateDefault | DateTimeStyles.AllowWhiteSpaces);
1059                         }
1060                         catch (FormatException ex)
1061                         {
1062                                 //TODO: What is the correct error number and message?
1063                                 Report.Error (1, Location, string.Format("Invalid date literal '{0}'", value.ToString()) 
1064                                         + Environment.NewLine + ex.ToString());
1065                         }
1066                         catch (Exception)
1067                         {
1068                                 Report.Error (1, Location, "Error parsing date literal");       //TODO: What is the correct error number and message?
1069                         }
1070                         return new DateTime();
1071                 }
1072  
1073                 public void PositionCursorAtNextPreProcessorDirective()
1074                 {
1075                         int t;
1076                         
1077                         for(t = token(); t != Token.HASH && t != Token.EOF; t = token());
1078
1079                         if(t == Token.EOF)
1080                                 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1081                         
1082                         if(t == Token.HASH) {
1083                                 tokens_seen = false;
1084                                 putback('#');
1085                         }
1086                 }
1087
1088         }
1089 }