2005-07-13 Maverson Eduardo Schulze Rosa <maverson@gmail.com>
[mono.git] / mcs / mbas / mb-tokenizer.cs
1 //
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
3 //
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 //       : Manjula GHM (mmanjula@novell.com)  
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
7 //
8 // Licensed under the terms of the GNU GPL
9 //
10 // Copyright (C) 2001 A Rafael D Teixeira
11 //
12
13 namespace Mono.MonoBASIC
14 {
15         using System;
16         using System.Text;
17         using System.Collections;
18         using System.IO;
19         using System.Globalization;
20         using Mono.Languages;
21         using Mono.MonoBASIC;
22         
23         /// <summary>
24         ///    Tokenizer for MonoBASIC source code. 
25         /// </summary>
26         
27         public class Tokenizer : yyParser.yyInput
28         {
29                 TextReader reader;
30                 string file_name;
31                 string ref_name;
32                 int ref_line = 0;
33                 int line = 0;
34                 int col = 1;
35                 public int current_token = Token.ERROR;
36                 bool handle_get_set = false;
37                 bool cant_have_a_type_character = false;
38
39                 public int ExpandedTabsSize = 4; 
40
41                 public string location {
42                         get {
43                                 string det;
44
45                                 if (current_token == Token.ERROR)
46                                         det = "detail: " + error_details;
47                                 else
48                                         det = "";
49                                 
50                                 return "Line:     "+line+" Col: "+col + "\n" +
51                                        "VirtLine: "+ref_line +
52                                        " Token: "+current_token + " " + det;
53                         }
54                 }
55
56                 public bool properties {
57                         get {
58                                 return handle_get_set;
59                         }
60
61                         set {
62                                 handle_get_set = value;
63                         }
64                 }
65                 
66                 //
67                 // Class variables
68                 // 
69                 static Hashtable keywords;
70                 static NumberStyles styles;
71                 static NumberFormatInfo csharp_format_info;
72                 
73                 //
74                 // Values for the associated token returned
75                 //
76                 StringBuilder number;
77                 int putback_char = -1;
78                 Object val;
79                 long lon = 0;
80                 
81                 //
82                 // Details about the error encoutered by the tokenizer
83                 //
84                 string error_details;
85                 
86                 public string error {
87                         get {
88                                 return error_details;
89                         }
90                 }
91
92                 
93                 public string Source {
94                         get {
95                                 return file_name;
96                         }
97
98                         set {
99                                 file_name = value;
100                                 ref_name = value;
101                                 Location.SetCurrentSource(file_name);
102                         }
103                 }
104
105                 public string EffectiveSource {
106                         get {
107                                 return ref_name;
108                         }
109                         set {
110                                 ref_name = value;
111                                 Location.SetCurrentSource(ref_name);
112                         }
113                 }
114
115                 public int Line {
116                         get {
117                                 return line;
118                         }
119                 }
120
121                 public int EffectiveLine {
122                         get {
123                                 return ref_line;
124                         }
125                         set {
126                                 ref_line = value;
127                         }
128                 }
129
130                 public int Col {
131                         get {
132                                 return col;
133                         }
134                 }
135                 
136                 static void initTokens ()
137                 {
138                         keywords = new Hashtable ();
139
140                         keywords.Add ("addhandler", Token.ADDHANDLER);
141                         keywords.Add ("addressof", Token.ADDRESSOF);
142                         keywords.Add ("alias", Token.ALIAS);
143                         keywords.Add ("and", Token.AND);
144                         keywords.Add ("andalso", Token.ANDALSO);
145                         keywords.Add ("ansi", Token.ANSI);
146                         keywords.Add ("as", Token.AS);
147                         keywords.Add ("assembly", Token.ASSEMBLY);
148                         keywords.Add ("auto", Token.AUTO);
149                         keywords.Add ("binary", Token.BINARY); // Not a VB.NET Keyword 
150                         keywords.Add ("boolean", Token.BOOLEAN);
151                         keywords.Add ("byref", Token.BYREF);
152                         keywords.Add ("byte", Token.BYTE);
153                         keywords.Add ("byval", Token.BYVAL);
154                         keywords.Add ("call", Token.CALL);
155                         keywords.Add ("case", Token.CASE);
156                         keywords.Add ("catch", Token.CATCH);
157                         keywords.Add ("cbool", Token.CBOOL);
158                         keywords.Add ("cbyte", Token.CBYTE);
159                         keywords.Add ("cchar", Token.CCHAR);
160                         keywords.Add ("cdate", Token.CDATE);
161                         keywords.Add ("cdec", Token.CDEC);
162                         keywords.Add ("cdbl", Token.CDBL);
163                         keywords.Add ("char", Token.CHAR);
164                         keywords.Add ("cint", Token.CINT);
165                         keywords.Add ("class", Token.CLASS);
166                         keywords.Add ("clng", Token.CLNG);
167                         keywords.Add ("cobj", Token.COBJ);
168                         keywords.Add ("compare", Token.COMPARE); // Not a VB.NET Keyword
169                         keywords.Add ("const", Token.CONST);
170                         keywords.Add ("cshort", Token.CSHORT);
171                         keywords.Add ("csng", Token.CSNG);
172                         keywords.Add ("cstr", Token.CSTR);
173                         keywords.Add ("ctype", Token.CTYPE);
174                         keywords.Add ("date", Token.DATE);
175                         keywords.Add ("decimal", Token.DECIMAL);
176                         keywords.Add ("declare", Token.DECLARE);
177                         keywords.Add ("default", Token.DEFAULT);
178                         keywords.Add ("delegate", Token.DELEGATE);
179                         keywords.Add ("dim", Token.DIM);
180                         keywords.Add ("directcast", Token.DIRECTCAST);                  
181                         keywords.Add ("do", Token.DO);
182                         keywords.Add ("double", Token.DOUBLE);
183                         keywords.Add ("each", Token.EACH);
184                         keywords.Add ("else", Token.ELSE);
185                         keywords.Add ("elseif", Token.ELSEIF);
186                         keywords.Add ("end", Token.END);
187                         keywords.Add ("endif", Token.ENDIF); // An unused VB.NET keyword
188                         keywords.Add ("enum", Token.ENUM);
189                         keywords.Add ("erase", Token.ERASE);
190                         keywords.Add ("error", Token.ERROR);
191                         keywords.Add ("event", Token.EVENT);
192                         keywords.Add ("exit", Token.EXIT);
193                         keywords.Add ("explicit", Token.EXPLICIT); // Not a VB.NET keyword 
194                         keywords.Add ("false", Token.FALSE);
195                         keywords.Add ("finally", Token.FINALLY);
196                         keywords.Add ("for", Token.FOR);
197                         keywords.Add ("friend", Token.FRIEND);
198                         keywords.Add ("function", Token.FUNCTION);
199                         keywords.Add ("get", Token.GET);
200                         keywords.Add ("gettype", Token.GETTYPE);
201                         keywords.Add ("gosub", Token.GOSUB); // An unused VB.NET keyword 
202                         keywords.Add ("goto", Token.GOTO);
203                         keywords.Add ("handles", Token.HANDLES);
204                         keywords.Add ("if", Token.IF);
205                         keywords.Add ("implements", Token.IMPLEMENTS);
206                         keywords.Add ("imports", Token.IMPORTS);
207                         keywords.Add ("in", Token.IN);
208                         keywords.Add ("inherits", Token.INHERITS);
209                         keywords.Add ("integer", Token.INTEGER);
210                         keywords.Add ("interface", Token.INTERFACE);
211                         keywords.Add ("is", Token.IS);
212                         keywords.Add ("let ", Token.LET ); // An unused VB.NET keyword
213                         keywords.Add ("lib ", Token.LIB );
214                         keywords.Add ("like", Token.LIKE );
215                         keywords.Add ("long", Token.LONG);
216                         keywords.Add ("loop", Token.LOOP);
217                         keywords.Add ("me", Token.ME);
218                         keywords.Add ("mod", Token.MOD);
219                         keywords.Add ("module", Token.MODULE);
220                         keywords.Add ("mustinherit", Token.MUSTINHERIT);
221                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
222                         keywords.Add ("mybase", Token.MYBASE);
223                         keywords.Add ("myclass", Token.MYCLASS);
224                         keywords.Add ("namespace", Token.NAMESPACE);
225                         keywords.Add ("new", Token.NEW);
226                         keywords.Add ("next", Token.NEXT);
227                         keywords.Add ("not", Token.NOT);
228                         keywords.Add ("nothing", Token.NOTHING);
229                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);
230                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
231                         keywords.Add ("object", Token.OBJECT);
232                         keywords.Add ("off", Token.OFF); // Not a VB.NET Keyword 
233                         keywords.Add ("on", Token.ON);
234                         keywords.Add ("option", Token.OPTION);
235                         keywords.Add ("optional", Token.OPTIONAL);
236                         keywords.Add ("or", Token.OR);
237                         keywords.Add ("orelse", Token.ORELSE);
238                         keywords.Add ("overloads", Token.OVERLOADS);
239                         keywords.Add ("overridable", Token.OVERRIDABLE);
240                         keywords.Add ("overrides", Token.OVERRIDES);
241                         keywords.Add ("paramarray", Token.PARAM_ARRAY);
242                         keywords.Add ("preserve", Token.PRESERVE);
243                         keywords.Add ("private", Token.PRIVATE);
244                         keywords.Add ("property", Token.PROPERTY);
245                         keywords.Add ("protected", Token.PROTECTED);
246                         keywords.Add ("public", Token.PUBLIC);
247                         keywords.Add ("raiseevent", Token.RAISEEVENT);
248                         keywords.Add ("readonly", Token.READONLY);
249                         keywords.Add ("redim", Token.REDIM);
250                         keywords.Add ("rem", Token.REM);
251                         keywords.Add ("removehandler", Token.REMOVEHANDLER);
252                         keywords.Add ("resume", Token.RESUME);
253                         keywords.Add ("return", Token.RETURN);
254                         keywords.Add ("select", Token.SELECT);
255                         keywords.Add ("set", Token.SET);
256                         keywords.Add ("shadows", Token.SHADOWS);
257                         keywords.Add ("shared", Token.SHARED);
258                         keywords.Add ("short", Token.SHORT);
259                         keywords.Add ("single", Token.SINGLE);
260                         keywords.Add ("sizeof", Token.SIZEOF); // Not a VB.NET Keyword 
261                         keywords.Add ("static", Token.STATIC);
262                         keywords.Add ("step", Token.STEP);
263                         keywords.Add ("stop", Token.STOP);
264                         keywords.Add ("strict", Token.STRICT); // Not a VB.NET Keyword 
265                         keywords.Add ("string", Token.STRING);
266                         keywords.Add ("structure", Token.STRUCTURE);
267                         keywords.Add ("sub", Token.SUB);
268                         keywords.Add ("synclock", Token.SYNCLOCK);
269                         keywords.Add ("text", Token.TEXT); // Not a VB.NET Keyword
270                         keywords.Add ("then", Token.THEN);
271                         keywords.Add ("throw", Token.THROW);
272                         keywords.Add ("to", Token.TO);
273                         keywords.Add ("true", Token.TRUE);
274                         keywords.Add ("try", Token.TRY);
275                         keywords.Add ("typeof", Token.TYPEOF);
276                         keywords.Add ("unicode", Token.UNICODE);
277                         keywords.Add ("until", Token.UNTIL);
278                         keywords.Add ("variant", Token.VARIANT); // An unused VB.NET keyword
279                         keywords.Add ("wend", Token.WEND); // An unused VB.NET keyword
280                         keywords.Add ("when", Token.WHEN);
281                         keywords.Add ("while", Token.WHILE);
282                         keywords.Add ("with", Token.WITH);
283                         keywords.Add ("withevents", Token.WITHEVENTS);
284                         keywords.Add ("writeonly", Token.WRITEONLY);
285                         keywords.Add ("xor", Token.XOR);
286
287                         if (Parser.UseExtendedSyntax){
288                                 keywords.Add ("yield", Token.YIELD);
289                         }
290
291                 }
292
293                 static Tokenizer ()
294                 {
295                         initTokens ();
296                         csharp_format_info = new NumberFormatInfo ();
297                         csharp_format_info.CurrencyDecimalSeparator = ".";
298                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
299                 }
300
301                 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
302                 {
303                         this.Source = fname;
304
305                         reader = input;
306
307                         // putback an EOL at the beginning of a stream. This is a convenience that 
308                         // allows pre-processor directives to be added to the beginning of a vb file.
309                         putback('\n');
310                 }
311
312                 bool is_keyword (string name)
313                 {
314                         bool res;
315                         name = name.ToLower();
316
317                         res = keywords.Contains(name);
318                         if ((name == "GET" || name == "SET") && handle_get_set == false)
319                                 return false;
320                         return res;
321                 }
322
323                 int getKeyword (string name)
324                 {
325                         return (int) (keywords [name.ToLower()]);
326                 }
327                 
328                 public Location Location {
329                         get {
330                                 return new Location (ref_line, col);
331                         }
332                 }
333                 
334                 public bool PropertyParsing {
335                         get {
336                                 return handle_get_set;
337                         }
338
339                         set {
340                                 handle_get_set = value;
341                         }
342                 }
343                                 
344                 bool is_identifier_start_character (char c)
345                 {
346                         return Char.IsLetter (c) || c == '_' ;
347                 }
348
349                 bool is_identifier_part_character (char c)
350                 {
351                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
352                 }
353
354                 int is_punct (char c, ref bool doread)
355                 {
356                         int d;
357                         int t;
358
359                         doread = false;
360                         
361                         error_details = c.ToString();
362                         
363                         d = peekChar ();
364                         
365                         switch (c){
366                         case '[':
367                                 return Token.OPEN_BRACKET;
368                         case ']':
369                                 return Token.CLOSE_BRACKET;
370                         case '{':
371                                 return Token.OPEN_BRACE;
372                         case '}':
373                                 return Token.CLOSE_BRACE;                               
374                         case '(':
375                                 return Token.OPEN_PARENS;
376                         case ')':
377                                 return Token.CLOSE_PARENS;
378                         case ',':
379                                 return Token.COMMA;
380                         case '?':
381                                 return Token.INTERR;
382                         case '!':
383                                 if (is_identifier_start_character((char)d) || cant_have_a_type_character)
384                                         return Token.EXCLAMATION;
385                                 return Token.SINGLETYPECHAR;
386                         case '$':
387                                 if (cant_have_a_type_character)
388                                         return Token.ERROR;
389                                 return Token.DOLAR_SIGN;
390                         case '@':
391                                 if (cant_have_a_type_character)
392                                         return Token.ERROR;
393                                 return Token.AT_SIGN;
394                         case '%':
395                                 if (cant_have_a_type_character)
396                                         return Token.ERROR;
397                                 return Token.PERCENT;
398                         case '#':
399                                 if(tokens_seen)
400                                 {
401                                         if (cant_have_a_type_character) 
402                                                 return ExtractDateTimeLiteral();
403                                         else
404                                                 return Token.NUMBER_SIGN;
405                                 }
406                                 else 
407                                 {
408                                         tokens_seen = true;
409                                         return Token.HASH;
410                                 } 
411                         case '&':
412                                 if (!cant_have_a_type_character)
413                                         return Token.LONGTYPECHAR;
414                                 t = handle_integer_literal_in_other_bases(d);
415                                 if (t == Token.NONE) {
416                                         t = Token.OP_CONCAT;
417                                 }
418                                 return t;                       
419                         }
420
421                         if (c == '+'){
422                                 if (d == '+')
423                                         t = Token.OP_INC;
424                                 else 
425                                         return Token.PLUS;
426                                 doread = true;
427                                 return t;
428                         }
429                         if (c == '-'){
430                                 return Token.MINUS;
431                         }
432
433                         if (c == '='){
434                                 return Token.ASSIGN;
435                         }
436
437                         if (c == '*'){
438                                 return Token.STAR;
439                         }
440
441                         if (c == '/'){
442                                 return Token.DIV;
443                         }
444
445                         if (c == '\\'){
446                                 return Token.OP_IDIV;
447                         }
448
449                         if (c == '^'){
450                                 return Token.OP_EXP;
451                         }
452
453                         if (c == '<'){
454                                 if (d == '>')
455                                 {
456                                         doread = true;
457                                         return Token.OP_NE;
458                                 }
459                                 if (d == '='){
460                                         doread = true;
461                                         return Token.OP_LE;
462                                 }
463                                 if (d == '<')
464                                 {
465                                         doread = true;
466                                         return Token.OP_SHIFT_LEFT;
467                                 }
468                                 return Token.OP_LT;
469                         }
470
471                         if (c == '>'){
472                                 if (d == '='){
473                                         doread = true;
474                                         return Token.OP_GE;
475                                 }
476                                 if (d == '>')
477                                 {
478                                         doread = true;
479                                         return Token.OP_SHIFT_RIGHT;
480                                 }
481                                 return Token.OP_GT;
482                         }
483                         
484                         if (c == ':'){
485                                 if (d == '='){
486                                         doread = true;
487                                         return Token.ATTR_ASSIGN;
488                                 }
489                                 return Token.COLON;
490                         }                       
491                         
492                         return Token.ERROR;
493                 }
494
495                 bool decimal_digits (int c)
496                 {
497                         int d;
498                         bool seen_digits = false;
499                         
500                         if (c != -1)
501                                 number.Append ((char) c);
502                         while ((d = peekChar ()) != -1){
503                                 if (Char.IsDigit ((char)d)){
504                                         number.Append ((char) d);
505                                         getChar ();
506                                         seen_digits = true;
507                                 } else
508                                         break;
509                         }
510                         return seen_digits;
511                 }
512
513                 
514                 int real_type_suffix (int c)
515                 {
516                         int t;
517                         
518                         switch (c){
519                         case 'F': case 'f':
520                                 t =  Token.LITERAL_SINGLE;
521                                 break;
522                         case 'R': case 'r':
523                                 t = Token.LITERAL_DOUBLE;
524                                 break;
525                         case 'D': case 'd':
526                                  t= Token.LITERAL_DECIMAL;
527                                 break;
528                         default:
529                                 return Token.NONE;
530                         }
531                         getChar ();
532                         return t;
533                 }
534
535                 int integer_type_suffix (int c)
536                 {
537                         int t;
538                         
539                         try {
540                         
541                                 switch (c){
542                                 case 'S': case 's':
543                                         t =  Token.LITERAL_INTEGER; // SHORT ?
544                         
545                                 // hexadecimal literals - like &H8000S is "-32768" 
546                                 // and not an overflow exception 
547                                 // Check for other literals ???
548
549                                         if(lon == 32768) {
550                                                 val = (short) lon;
551                                         }
552                                         else 
553                                                 val = ((IConvertible)val).ToInt16(null);
554                                         break;
555                                 case 'I': case 'i':
556                                         t = Token.LITERAL_INTEGER;
557                                         val = ((IConvertible)val).ToInt32(null);
558                                         break;
559                                 case 'L': case 'l':
560                                          t= Token.LITERAL_INTEGER; // LONG ?
561                                          val = ((IConvertible)val).ToInt64(null);
562                                         break;
563                                 default:
564                                         if ((long)val <= System.Int32.MaxValue &&
565                                                 (long)val >= System.Int32.MinValue) {
566                                                 val = ((IConvertible)val).ToInt32(null);
567                                                 return Token.LITERAL_INTEGER;
568                                         } else {
569                                                 val = ((IConvertible)val).ToInt64(null);
570                                                 return Token.LITERAL_INTEGER; // LONG ?
571                                         }
572                                 }
573                                 getChar ();
574                                 return t;
575                         } catch (Exception e) {
576                                 val = e.ToString();
577                                 return Token.ERROR;
578                         }
579                 }
580                 
581                 int adjust_real (int t)
582                 {
583                         string s = number.ToString ();
584
585                         switch (t){
586                         case Token.LITERAL_DECIMAL:
587                                 val = new System.Decimal ();
588                                 val = System.Decimal.Parse (
589                                         s, styles, csharp_format_info);
590                                 break;
591                         case Token.LITERAL_DOUBLE:
592                                 val = new System.Double ();
593                                 val = System.Double.Parse (
594                                         s, styles, csharp_format_info);
595                                 break;
596                         case Token.LITERAL_SINGLE:
597                                 val = new System.Double ();
598                                 val = (float) System.Double.Parse (
599                                         s, styles, csharp_format_info);
600                                 break;
601
602                         case Token.NONE:
603                                 val = new System.Double ();
604                                 val = System.Double.Parse (
605                                         s, styles, csharp_format_info);
606                                 t = Token.LITERAL_DOUBLE;
607                                 break;
608                         }
609                         return t;
610                 }
611
612                 long hex_digits ()
613                 {
614                         StringBuilder hexNumber = new StringBuilder ();
615                         
616                         int d;
617
618                         while ((d = peekChar ()) != -1){
619                                 char e = Char.ToUpper ((char) d);
620                                 
621                                 if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
622                                         hexNumber.Append (e);
623                                         getChar ();
624                                 } else
625                                         break;
626                         }
627                         lon = System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
628                         return lon;
629                 }
630
631                 long octal_digits ()
632                 {
633                         long valueToReturn = 0;
634                         
635                         int d;
636
637                         while ((d = peekChar ()) != -1){
638                                 char e = (char)d;                       
639                                 if (Char.IsDigit (e) && (e < '8')){
640                                         valueToReturn *= 8;
641                                         valueToReturn += (d - (int)'0');
642                                         getChar ();
643                                 } else
644                                         break;
645                         }
646                         
647                         return valueToReturn;
648                 }
649
650                 int handle_integer_literal_in_other_bases(int peek)
651                 {
652                         if (peek == 'h' || peek == 'H'){
653                                 getChar ();
654                                 val = hex_digits ();
655                                 return integer_type_suffix (peekChar ());
656                         }
657                         
658                         if (peek == 'o' || peek == 'O'){
659                                 getChar ();
660                                 val = octal_digits ();
661                                 return integer_type_suffix (peekChar ());
662                         }
663                         
664                         return Token.NONE;
665                 }
666                 
667                 //
668                 // Invoked if we know we have .digits or digits
669                 //
670                 int is_number (int c)
671                 {
672                         bool is_real = false;
673                         number = new StringBuilder ();
674                         int type;
675                         bool non_prefixdecimal = false; //To capture decimals like .50
676
677                         number.Length = 0;
678
679                         if (Char.IsDigit ((char)c)){
680                                 decimal_digits (c);
681                                 c = peekChar ();        
682                                 non_prefixdecimal = true;
683                         }
684
685                         //
686                         // We need to handle the case of
687                         // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
688                         //
689                         if (c == '.'){
690                                 if (non_prefixdecimal == false)
691                                          putback ('.');
692                                 if (decimal_digits (getChar())){
693                                         is_real = true;
694                                         c = peekChar ();
695                                 } else {
696                                         putback ('.');
697                                         number.Length -= 1;
698                                         val = System.Int64.Parse(number.ToString());
699                                         return integer_type_suffix('.');
700                                 }
701                         }
702                         
703                         if (c == 'e' || c == 'E'){
704                                 is_real = true;
705                                 number.Append ("e");
706                                 getChar ();
707                                 
708                                 c = peekChar ();
709                                 if (c == '+'){
710                                         number.Append ((char) c);
711                                         getChar ();
712                                         c = peekChar ();
713                                 } else if (c == '-'){
714                                         number.Append ((char) c);
715                                         getChar ();
716                                         c = peekChar ();
717                                 }
718                                 decimal_digits (-1);
719                                 c = peekChar ();
720                         }
721
722                         type = real_type_suffix (c);
723                         if (type == Token.NONE && !is_real){
724                                 val = System.Int64.Parse(number.ToString());
725                                 return integer_type_suffix(c);
726                         }
727                         
728                         return adjust_real (type);
729                 }
730                         
731                 int getChar ()
732                 {
733                         if (putback_char != -1){
734                                 int x = putback_char;
735                                 putback_char = -1;
736
737                                 return x;
738                         }
739                         return reader.Read ();
740                 }
741
742                 int peekChar ()
743                 {
744                         if (putback_char != -1)
745                                 return putback_char;
746                         return reader.Peek ();
747                 }
748                 
749
750                 void putback (int c)
751                 {
752                         if (putback_char != -1)
753                                 throw new Exception ("This should not happen putback on putback");
754                         putback_char = c;
755                 }
756
757                 public bool advance ()
758                 {
759                         return current_token != Token.EOF ;
760                 }
761
762                 public Object Value {
763                         get {
764                                 return val;
765                         }
766                 }
767
768                 public Object value ()
769                 {
770                         return val;
771                 }
772
773                 private bool IsEOL(int currentChar)
774                 {
775                         bool retVal;
776                         
777                         if (currentChar ==  0x0D) {
778                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
779                                         getChar();
780
781                                 retVal = true;
782                         }
783                         else {
784                                 retVal = (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
785                         }
786
787                         if(retVal) {
788                                 nextLine();
789                         }
790
791                         return retVal;
792                 }
793
794                 private int DropComments()              
795                 {
796                         //int d;
797                         while (!IsEOL(/*d =*/ getChar ()))
798                                 col++;
799
800                         return Token.EOL;
801                 }       
802                 
803                 public bool putbacktoken = false;
804                 public bool flag = false;               
805                 int next_token;
806                         
807                 public int token ()
808                 {
809                         int lastToken = current_token;
810                         do
811                         {
812                                 current_token = xtoken ();
813                                 if(current_token == Token.END) {
814                                         next_token = xtoken();
815                                         putbacktoken = true;
816                                         if (next_token == Token.EOL) 
817                                                 return Token.END_EOL;
818                                          else 
819                                                 return Token.END;
820                                 }       
821                                 if (current_token == 0) 
822                                         return Token.EOF;
823                                 if (current_token == Token.REM)
824                                         current_token = DropComments();
825                         } while (lastToken == Token.EOL && current_token == Token.EOL);
826
827                         return current_token;
828                 }
829
830                 private string GetIdentifier()
831                 {
832                         int c = getChar();
833                         if (is_identifier_start_character ((char) c))
834                                 return GetIdentifier(c);
835                         else
836                                 return null;
837                 }
838
839                 private string GetIdentifier(int c)
840                 {
841                         StringBuilder id = new StringBuilder ();
842
843                         id.Append ((char) c);
844                                 
845                         while ((c = peekChar ()) != -1) 
846                         {
847                                 if (is_identifier_part_character ((char) c))
848                                 {
849                                         id.Append ((char)getChar ());
850                                         col++;
851                                 } 
852                                 else 
853                                         break;
854                         }
855                         
856                         cant_have_a_type_character = false;
857                         
858                         return id.ToString();
859                 }
860
861                 private bool is_doublequote(int currentChar)
862                 {
863                         return (currentChar == '"' || 
864                                         currentChar == 0x201C || // unicode left double-quote character
865                                         currentChar == 0x201D);  // unicode right double-quote character
866                 }
867                 
868                 private bool is_whitespace(int c)
869                 {
870                         return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
871                 }
872                 
873                 private bool tokens_seen = false;
874                 
875                 private void nextLine()
876                 {
877                         cant_have_a_type_character = true;
878                         line++;
879                         ref_line++;
880                         col = 0;
881                         tokens_seen = false;
882                 }
883
884                 public int xtoken ()
885                 {
886                         int t;
887                         bool doread = false;
888                         int c;
889
890                         if (putbacktoken == true) {
891                                 putbacktoken = false;
892                                 return next_token;
893                         }
894         
895                         val = null;
896                         for (;(c = getChar ()) != -1; col++) {
897                         
898                                 // Handle line continuation character
899                                 if (c == '_') 
900                                 {
901                                         int d = peekChar();
902                                         if (!is_identifier_part_character((char)d)) {
903                                                 while ((c = getChar ()) != -1 && !IsEOL(c)) {}
904                                                 c = getChar ();                 
905                                         }               
906                                 }
907                                         
908                                 
909                                 // white space
910                                 if (is_whitespace(c)) {
911                                         // expand tabs for location
912                                         if (c == '\t')
913                                                 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
914                                         cant_have_a_type_character = true;
915                                         continue;
916                                 }
917                                 
918                                 // Handle line comments.
919                                 if (c == '\'')
920                                         return Token.REM;                                       
921                                 
922                                 // Handle EOL.
923                                 if (IsEOL(c))
924                                 {
925                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping
926                                                 continue;
927                                         return Token.EOL;
928                                 }
929                                 
930                                 // Handle escaped identifiers
931                                 if (c == '[')
932                                 {
933                                         if ((val = GetIdentifier()) == null)
934                                                 break;
935                                         if ((c = getChar()) != ']')
936                                                 break;
937                                         tokens_seen = true;
938                                         return Token.IDENTIFIER;
939                                 }
940
941                                 // Handle unescaped identifiers
942                                 if (is_identifier_start_character ((char) c))
943                                 {
944                                         string id;
945                                         if ((id = GetIdentifier(c)) == null)
946                                                 break;
947                                         val = id;
948                                         tokens_seen = true;
949                                         if (is_keyword(id) && (current_token != Token.DOT))
950                                                 return getKeyword(id);
951                                         return Token.IDENTIFIER;
952                                 }
953
954                                 // Treat string literals
955                                 if (is_doublequote(c)) {
956                                         cant_have_a_type_character = true;
957                                         return ExtractStringOrCharLiteral(c);
958                                 }
959                         
960                                 // handle numeric literals
961
962                                 if (Char.IsDigit ((char) c))
963                                 {
964                                         cant_have_a_type_character = true;
965                                         tokens_seen = true;
966                                         return is_number (c);
967                                 }
968
969                                 if (c == '.')
970                                 {
971                                         cant_have_a_type_character = true;
972                                         tokens_seen = true;
973                                         if (Char.IsDigit ((char) peekChar ()))
974                                                 return is_number (c);
975                                         return Token.DOT;
976                                 }
977                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
978                                         cant_have_a_type_character = true;
979
980                                         if (t == Token.NONE)
981                                                 continue;
982                                                 
983                                         if (doread){
984                                                 getChar ();
985                                                 col++;
986                                         }
987                                         tokens_seen = true;
988                                         return t;
989                                 }
990                                 
991                                 error_details = ((char)c).ToString ();
992                                 return Token.ERROR;
993                         }
994
995                         if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
996                                 return Token.EOL;
997                         
998                         return Token.EOF;
999                 }
1000
1001                 private int ExtractDateTimeLiteral()
1002                 {
1003                         int c;
1004                         
1005                         StringBuilder sb = new StringBuilder();
1006                         for (;(c = getChar ()) != -1; col++)
1007                         {
1008                                 if (c == '#') {
1009                                         val = ParseDateLiteral(sb);
1010                                         return Token.LITERAL_DATE;
1011                                 }
1012                                 if (IsEOL(c)) {
1013                                         break;
1014                                 } 
1015                                 if (c == '-')
1016                                         c = '/';
1017                                 sb.Append((char)c);
1018                         }
1019                         return Token.ERROR;
1020                 }
1021                 
1022                 private int ExtractStringOrCharLiteral(int c)
1023                 {
1024                         StringBuilder s = new StringBuilder ();
1025
1026                         tokens_seen = true;
1027
1028                         while ((c = getChar ()) != -1){
1029                                 if (is_doublequote(c)){
1030                                         if (is_doublequote(peekChar()))
1031                                                 getChar();
1032                                         else {
1033                                                 //handle Char Literals
1034                                                 if (peekChar() == 'C' || peekChar() == 'c') {
1035                                                         getChar();
1036                                                         if (s.Length == 1) {
1037                                                                 val = s[0];
1038                                                                 return Token.LITERAL_CHARACTER;
1039                                                         } else {
1040                                                                 val = "Incorrect length for a character literal";
1041                                                                 return Token.ERROR;
1042                                                         }                                                       
1043                                                 } else {
1044                                                         val = s.ToString ();
1045                                                         return Token.LITERAL_STRING;
1046                                                 }
1047                                         }
1048                                 }
1049
1050                                 if (IsEOL(c)) {
1051                                         return Token.ERROR;
1052                                 }
1053                         
1054                                 s.Append ((char) c);
1055                         }
1056                                         
1057                         return Token.ERROR;
1058                 }
1059
1060                 static IFormatProvider enUSculture = new CultureInfo("en-US", true);
1061
1062                 private DateTime ParseDateLiteral(StringBuilder value)
1063                 {
1064                         try
1065                         {
1066                                 return DateTime.Parse(value.ToString(),
1067                                                   enUSculture,
1068                                                   DateTimeStyles.NoCurrentDateDefault | DateTimeStyles.AllowWhiteSpaces);
1069                         }
1070                         catch (FormatException ex)
1071                         {
1072                                 //TODO: What is the correct error number and message?
1073                                 Report.Error (1, Location, string.Format("Invalid date literal '{0}'", value.ToString()) 
1074                                         + Environment.NewLine + ex.ToString());
1075                         }
1076                         catch (Exception)
1077                         {
1078                                 Report.Error (1, Location, "Error parsing date literal");       //TODO: What is the correct error number and message?
1079                         }
1080                         return new DateTime();
1081                 }
1082  
1083                 public void PositionCursorAtNextPreProcessorDirective()
1084                 {
1085                         int t;
1086                         
1087                         for(t = token(); t != Token.HASH && t != Token.EOF ; t = token()); 
1088
1089                         if(t == Token.EOF)
1090                                 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1091                         
1092                         if(t == Token.HASH) {
1093                                 tokens_seen = false;
1094                                 putback('#');
1095                         }
1096                 }
1097
1098         }
1099 }