2005-05-09 Sebastien Pouliot <sebastien@ximian.com>
[mono.git] / mcs / bmcs / mb-tokenizer.cs
1 //
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
3 //
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 //         
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
7 //
8 // Licensed under the terms of the GNU GPL
9 //
10 // Copyright (C) 2001 A Rafael D Teixeira
11 //
12
13 namespace Mono.CSharp
14 {
15         using System;
16         using System.Text;
17         using System.Collections;
18         using System.IO;
19         using System.Globalization;
20         //      using Mono.Languages;
21         using Mono.CSharp;
22         
23         /// <summary>
24         ///    Tokenizer for MonoBASIC source code. 
25         /// </summary>
26         
27         public class Tokenizer : yyParser.yyInput
28         {
29                 SeekableStreamReader reader;
30                 SourceFile file_name;
31                 SourceFile ref_name;
32                 int ref_line = 0;
33                 int line = 0;
34                 int col = 1;
35                 public int current_token = Token.ERROR;
36                 bool handle_get_set = false;
37                 bool cant_have_a_type_character = false;
38
39                 public int ExpandedTabsSize = 4; 
40
41                 public string location {
42                         get {
43                                 string det;
44
45                                 if (current_token == Token.ERROR)
46                                         det = "detail: " + error_details;
47                                 else
48                                         det = "";
49                                 
50                                 return "Line:     "+line+" Col: "+col + "\n" +
51                                        "VirtLine: "+ref_line +
52                                        " Token: "+current_token + " " + det;
53                         }
54                 }
55
56                 public bool properties {
57                         get {
58                                 return handle_get_set;
59                         }
60
61                         set {
62                                 handle_get_set = value;
63                         }
64                 }
65                 
66                 //
67                 // Class variables
68                 // 
69                 static Hashtable keywords;
70                 static NumberStyles styles;
71                 static NumberFormatInfo csharp_format_info;
72                 
73                 //
74                 // Values for the associated token returned
75                 //
76                 StringBuilder number;
77                 int putback_char = -1;
78                 Object val;
79                 
80                 //
81                 // Details about the error encoutered by the tokenizer
82                 //
83                 string error_details;
84                 
85                 public string error {
86                         get {
87                                 return error_details;
88                         }
89                 }
90
91 //              public string Source {
92 //                      get {
93 //                              return file_name;
94 //                      }
95
96 //                      set {
97 //                              file_name = value;
98 //                              ref_name = value;
99 //                              //Location.SetCurrentSource(file_name);
100 //                      }
101 //              }
102
103 //              public string EffectiveSource {
104 //                      get {
105 //                              return ref_name;
106 //                      }
107 //                      set {
108 //                              ref_name = value;
109 //                              //Location.SetCurrentSource(ref_name);
110 //                      }
111 //              }
112
113                 public int Line {
114                         get {
115                                 return line;
116                         }
117                 }
118
119                 public int EffectiveLine {
120                         get {
121                                 return ref_line;
122                         }
123                         set {
124                                 ref_line = value;
125                         }
126                 }
127
128                 public int Col {
129                         get {
130                                 return col;
131                         }
132                 }
133                 
134                 static void initTokens ()
135                 {
136                         keywords = new Hashtable ();
137
138                         keywords.Add ("addhandler", Token.ADDHANDLER);
139                         keywords.Add ("addressof", Token.ADDRESSOF);
140                         keywords.Add ("alias", Token.ALIAS);
141                         keywords.Add ("and", Token.AND);
142                         keywords.Add ("andalso", Token.ANDALSO);
143                         keywords.Add ("ansi", Token.ANSI);
144                         keywords.Add ("as", Token.AS);
145                         keywords.Add ("assembly", Token.ASSEMBLY);
146                         keywords.Add ("auto", Token.AUTO);
147                         keywords.Add ("binary", Token.BINARY); // Not a VB.NET Keyword 
148                         keywords.Add ("boolean", Token.BOOLEAN);
149                         keywords.Add ("byref", Token.BYREF);
150                         keywords.Add ("byte", Token.BYTE);
151                         keywords.Add ("byval", Token.BYVAL);
152                         keywords.Add ("call", Token.CALL);
153                         keywords.Add ("case", Token.CASE);
154                         keywords.Add ("catch", Token.CATCH);
155                         keywords.Add ("cbool", Token.CBOOL);
156                         keywords.Add ("cbyte", Token.CBYTE);
157                         keywords.Add ("cchar", Token.CCHAR);
158                         keywords.Add ("cdate", Token.CDATE);
159                         keywords.Add ("cdec", Token.CDEC);
160                         keywords.Add ("cdbl", Token.CDBL);
161                         keywords.Add ("char", Token.CHAR);
162                         keywords.Add ("cint", Token.CINT);
163                         keywords.Add ("class", Token.CLASS);
164                         keywords.Add ("clng", Token.CLNG);
165                         keywords.Add ("cobj", Token.COBJ);
166                         keywords.Add ("compare", Token.COMPARE); // Not a VB.NET Keyword
167                         keywords.Add ("const", Token.CONST);
168                         keywords.Add ("cshort", Token.CSHORT);
169                         keywords.Add ("csng", Token.CSNG);
170                         keywords.Add ("cstr", Token.CSTR);
171                         keywords.Add ("ctype", Token.CTYPE);
172                         keywords.Add ("date", Token.DATE);
173                         keywords.Add ("decimal", Token.DECIMAL);
174                         keywords.Add ("declare", Token.DECLARE);
175                         keywords.Add ("default", Token.DEFAULT);
176                         keywords.Add ("delegate", Token.DELEGATE);
177                         keywords.Add ("dim", Token.DIM);
178                         keywords.Add ("directcast", Token.DIRECTCAST);                  
179                         keywords.Add ("do", Token.DO);
180                         keywords.Add ("double", Token.DOUBLE);
181                         keywords.Add ("each", Token.EACH);
182                         keywords.Add ("else", Token.ELSE);
183                         keywords.Add ("elseif", Token.ELSEIF);
184                         keywords.Add ("end", Token.END);
185                         keywords.Add ("endif", Token.ENDIF); // An unused VB.NET keyword
186                         keywords.Add ("enum", Token.ENUM);
187                         keywords.Add ("erase", Token.ERASE);
188                         keywords.Add ("error", Token.ERROR);
189                         keywords.Add ("event", Token.EVENT);
190                         keywords.Add ("exit", Token.EXIT);
191                         keywords.Add ("explicit", Token.EXPLICIT); // Not a VB.NET keyword 
192                         keywords.Add ("false", Token.FALSE);
193                         keywords.Add ("finally", Token.FINALLY);
194                         keywords.Add ("for", Token.FOR);
195                         keywords.Add ("friend", Token.FRIEND);
196                         keywords.Add ("function", Token.FUNCTION);
197                         keywords.Add ("get", Token.GET);
198                         keywords.Add ("gettype", Token.GETTYPE);
199                         keywords.Add ("gosub", Token.GOSUB); // An unused VB.NET keyword 
200                         keywords.Add ("goto", Token.GOTO);
201                         keywords.Add ("handles", Token.HANDLES);
202                         keywords.Add ("if", Token.IF);
203                         keywords.Add ("implements", Token.IMPLEMENTS);
204                         keywords.Add ("imports", Token.IMPORTS);
205                         keywords.Add ("in", Token.IN);
206                         keywords.Add ("inherits", Token.INHERITS);
207                         keywords.Add ("integer", Token.INTEGER);
208                         keywords.Add ("interface", Token.INTERFACE);
209                         keywords.Add ("is", Token.IS);
210                         keywords.Add ("let", Token.LET ); // An unused VB.NET keyword
211                         keywords.Add ("lib", Token.LIB );
212                         keywords.Add ("like", Token.LIKE );
213                         keywords.Add ("long", Token.LONG);
214                         keywords.Add ("loop", Token.LOOP);
215                         keywords.Add ("me", Token.ME);
216                         keywords.Add ("mod", Token.MOD);
217                         keywords.Add ("module", Token.MODULE);
218                         keywords.Add ("mustinherit", Token.MUSTINHERIT);
219                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
220                         keywords.Add ("mybase", Token.MYBASE);
221                         keywords.Add ("myclass", Token.MYCLASS);
222                         keywords.Add ("namespace", Token.NAMESPACE);
223                         keywords.Add ("new", Token.NEW);
224                         keywords.Add ("next", Token.NEXT);
225                         keywords.Add ("not", Token.NOT);
226                         keywords.Add ("nothing", Token.NOTHING);
227                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);
228                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
229                         keywords.Add ("object", Token.OBJECT);
230                         keywords.Add ("off", Token.OFF); // Not a VB.NET Keyword 
231                         keywords.Add ("on", Token.ON);
232                         keywords.Add ("option", Token.OPTION);
233                         keywords.Add ("optional", Token.OPTIONAL);
234                         keywords.Add ("or", Token.OR);
235                         keywords.Add ("orelse", Token.ORELSE);
236                         keywords.Add ("overloads", Token.OVERLOADS);
237                         keywords.Add ("overridable", Token.OVERRIDABLE);
238                         keywords.Add ("overrides", Token.OVERRIDES);
239                         keywords.Add ("paramarray", Token.PARAM_ARRAY);
240                         keywords.Add ("preserve", Token.PRESERVE);
241                         keywords.Add ("private", Token.PRIVATE);
242                         keywords.Add ("property", Token.PROPERTY);
243                         keywords.Add ("protected", Token.PROTECTED);
244                         keywords.Add ("public", Token.PUBLIC);
245                         keywords.Add ("raiseevent", Token.RAISEEVENT);
246                         keywords.Add ("readonly", Token.READONLY);
247                         keywords.Add ("redim", Token.REDIM);
248                         keywords.Add ("rem", Token.REM);
249                         keywords.Add ("removehandler", Token.REMOVEHANDLER);
250                         keywords.Add ("resume", Token.RESUME);
251                         keywords.Add ("return", Token.RETURN);
252                         keywords.Add ("select", Token.SELECT);
253                         keywords.Add ("set", Token.SET);
254                         keywords.Add ("shadows", Token.SHADOWS);
255                         keywords.Add ("shared", Token.SHARED);
256                         keywords.Add ("short", Token.SHORT);
257                         keywords.Add ("single", Token.SINGLE);
258                         keywords.Add ("sizeof", Token.SIZEOF); // Not a VB.NET Keyword 
259                         keywords.Add ("static", Token.STATIC);
260                         keywords.Add ("step", Token.STEP);
261                         keywords.Add ("stop", Token.STOP);
262                         keywords.Add ("strict", Token.STRICT); // Not a VB.NET Keyword 
263                         keywords.Add ("string", Token.STRING);
264                         keywords.Add ("structure", Token.STRUCTURE);
265                         keywords.Add ("sub", Token.SUB);
266                         keywords.Add ("synclock", Token.SYNCLOCK);
267                         keywords.Add ("text", Token.TEXT); // Not a VB.NET Keyword
268                         keywords.Add ("then", Token.THEN);
269                         keywords.Add ("throw", Token.THROW);
270                         keywords.Add ("to", Token.TO);
271                         keywords.Add ("true", Token.TRUE);
272                         keywords.Add ("try", Token.TRY);
273                         keywords.Add ("typeof", Token.TYPEOF);
274                         keywords.Add ("unicode", Token.UNICODE);
275                         keywords.Add ("until", Token.UNTIL);
276                         keywords.Add ("variant", Token.VARIANT); // An unused VB.NET keyword
277                         keywords.Add ("wend", Token.WEND); // An unused VB.NET keyword
278                         keywords.Add ("when", Token.WHEN);
279                         keywords.Add ("while", Token.WHILE);
280                         keywords.Add ("with", Token.WITH);
281                         keywords.Add ("withevents", Token.WITHEVENTS);
282                         keywords.Add ("writeonly", Token.WRITEONLY);
283                         keywords.Add ("xor", Token.XOR);
284
285                         /*
286
287                         if (Parser.UseExtendedSyntax){
288                                 keywords.Add ("yield", Token.YIELD);
289                         }
290                         */
291
292
293                 }
294
295                 static Tokenizer ()
296                 {
297                         initTokens ();
298                         csharp_format_info = new NumberFormatInfo ();
299                         csharp_format_info.CurrencyDecimalSeparator = ".";
300                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
301                 }
302
303                 public Tokenizer (SeekableStreamReader input, SourceFile fname, ArrayList defines)
304                 {
305                         this.ref_name = fname;
306                         this.file_name = fname;
307
308                         reader = input;
309
310                         // putback an EOL at the beginning of a stream. This is a convenience that 
311                         // allows pre-processor directives to be added to the beginning of a vb file.
312                         putback('\n');
313                 }
314
315                 bool is_keyword (string name)
316                 {
317                         bool res;
318                         name = name.ToLower();
319
320                         res = keywords.Contains(name);
321                         if ((name == "GET" || name == "SET") && handle_get_set == false)
322                                 return false;
323                         return res;
324                 }
325
326                 int getKeyword (string name)
327                 {
328                         return (int) (keywords [name.ToLower()]);
329                 }
330                 
331                 public Location Location {
332                         get {
333                                 return new Location (ref_line);
334                         }
335                 }
336                 
337                 public bool PropertyParsing {
338                         get {
339                                 return handle_get_set;
340                         }
341
342                         set {
343                                 handle_get_set = value;
344                         }
345                 }
346                                 
347                 static bool is_identifier_start_character (char c)
348                 {
349                         return Char.IsLetter (c) || c == '_' ;
350                 }
351
352                 static bool is_identifier_part_character (char c)
353                 {
354                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
355                 }
356
357                 public static bool IsValidIdentifier (string s)
358                 {
359                         if (s == null || s.Length == 0)
360                                 return false;
361                         
362                         if (!is_identifier_start_character (s [0]))
363                                 return false;
364                         
365                         for (int i = 1; i < s.Length; i ++)
366                                 if (! is_identifier_part_character (s [i]))
367                                         return false;
368                         
369                         return true;
370                 }
371
372                 int is_punct (char c, ref bool doread)
373                 {
374                         int d;
375                         int t;
376
377                         doread = false;
378                         
379                         error_details = c.ToString();
380                         
381                         d = peekChar ();
382                         
383                         switch (c){
384                         case '[':
385                                 return Token.OPEN_BRACKET;
386                         case ']':
387                                 return Token.CLOSE_BRACKET;
388                         case '{':
389                                 return Token.OPEN_BRACE;
390                         case '}':
391                                 return Token.CLOSE_BRACE;                               
392                         case '(':
393                                 return Token.OPEN_PARENS;
394                         case ')':
395                                 return Token.CLOSE_PARENS;
396                         case ',':
397                                 return Token.COMMA;
398                         case '?':
399                                 return Token.INTERR;
400                         case '!':
401                                 if (is_identifier_start_character((char)d) || cant_have_a_type_character)
402                                         return Token.EXCLAMATION;
403                                 return Token.SINGLETYPECHAR;
404                         case '$':
405                                 if (cant_have_a_type_character)
406                                         return Token.ERROR;
407                                 return Token.DOLAR_SIGN;
408                         case '@':
409                                 if (cant_have_a_type_character)
410                                         return Token.ERROR;
411                                 return Token.AT_SIGN;
412                         case '%':
413                                 if (cant_have_a_type_character)
414                                         return Token.ERROR;
415                                 return Token.PERCENT;
416                         case '#':
417                                 if(tokens_seen)
418                                 {
419                                         if (cant_have_a_type_character) 
420                                                 return ParseDateLiteral();
421                                         else
422                                                 return Token.NUMBER_SIGN;
423                                 }
424                                 else 
425                                 {
426                                         tokens_seen = true;
427                                         return Token.HASH;
428                                 } 
429                         case '&':
430                                 if (!cant_have_a_type_character)
431                                         return Token.LONGTYPECHAR;
432                                 t = handle_integer_literal_in_other_bases(d);
433                                 if (t == Token.NONE) {
434                                         t = Token.OP_CONCAT;
435                                 }
436                                 return t;                       
437                         }
438
439                         if (c == '+'){
440                                 if (d == '+')
441                                         t = Token.OP_INC;
442                                 else 
443                                         return Token.PLUS;
444                                 doread = true;
445                                 return t;
446                         }
447                         if (c == '-'){
448                                 return Token.MINUS;
449                         }
450
451                         if (c == '='){
452                                 return Token.ASSIGN;
453                         }
454
455                         if (c == '*'){
456                                 return Token.STAR;
457                         }
458
459                         if (c == '/'){
460                                 return Token.DIV;
461                         }
462
463                         if (c == '\\'){
464                                 return Token.OP_IDIV;
465                         }
466
467                         if (c == '^'){
468                                 return Token.OP_EXP;
469                         }
470
471                         if (c == '<'){
472                                 if (d == '>')
473                                 {
474                                         doread = true;
475                                         return Token.OP_NE;
476                                 }
477                                 if (d == '='){
478                                         doread = true;
479                                         return Token.OP_LE;
480                                 }
481                                 if (d == '<')
482                                 {
483                                         doread = true;
484                                         return Token.OP_SHIFT_LEFT;
485                                 }
486                                 return Token.OP_LT;
487                         }
488
489                         if (c == '>'){
490                                 if (d == '='){
491                                         doread = true;
492                                         return Token.OP_GE;
493                                 }
494                                 if (d == '>')
495                                 {
496                                         doread = true;
497                                         return Token.OP_SHIFT_RIGHT;
498                                 }
499                                 return Token.OP_GT;
500                         }
501                         
502                         if (c == ':'){
503                                 if (d == '='){
504                                         doread = true;
505                                         return Token.ATTR_ASSIGN;
506                                 }
507                                 return Token.COLON;
508                         }                       
509                         
510                         return Token.ERROR;
511                 }
512
513                 bool decimal_digits (int c)
514                 {
515                         int d;
516                         bool seen_digits = false;
517                         
518                         if (c != -1)
519                                 number.Append ((char) c);
520                         
521                         while ((d = peekChar ()) != -1){
522                                 if (Char.IsDigit ((char)d)){
523                                         number.Append ((char) d);
524                                         getChar ();
525                                         seen_digits = true;
526                                 } else
527                                         break;
528                         }
529                         return seen_digits;
530                 }
531
532                 
533                 int real_type_suffix (int c)
534                 {
535                         int t;
536                         
537                         switch (c){
538                         case 'F': case 'f':
539                                 t =  Token.LITERAL_SINGLE;
540                                 break;
541                         case 'R': case 'r':
542                                 t = Token.LITERAL_DOUBLE;
543                                 break;
544                         case 'D': case 'd':
545                                  t= Token.LITERAL_DECIMAL;
546                                 break;
547                         default:
548                                 return Token.NONE;
549                         }
550                         getChar ();
551                         return t;
552                 }
553
554                 int integer_type_suffix (int c)
555                 {
556                         int t;
557                         
558                         try {
559                         
560                                 switch (c){
561                                 case 'S': case 's':
562                                         t =  Token.LITERAL_INTEGER; // SHORT ?
563                                         val = ((IConvertible)val).ToInt16(null);
564                                         break;
565                                 case 'I': case 'i':
566                                         t = Token.LITERAL_INTEGER;
567                                         val = ((IConvertible)val).ToInt32(null);
568                                         break;
569                                 case 'L': case 'l':
570                                          t= Token.LITERAL_INTEGER; // LONG ?
571                                          val = ((IConvertible)val).ToInt64(null);
572                                         break;
573                                 default:
574                                         if ((long)val <= System.Int32.MaxValue &&
575                                                 (long)val >= System.Int32.MinValue) {
576                                                 val = ((IConvertible)val).ToInt32(null);
577                                                 return Token.LITERAL_INTEGER;
578                                         } else {
579                                                 val = ((IConvertible)val).ToInt64(null);
580                                                 return Token.LITERAL_INTEGER; // LONG ?
581                                         }
582                                 }
583                                 getChar ();
584                                 return t;
585                         } catch (Exception e) {
586                                 val = e.ToString();
587                                 return Token.ERROR;
588                         }
589                 }
590                 
591                 int adjust_real (int t)
592                 {
593                         string s = number.ToString ();
594
595                         switch (t){
596                         case Token.LITERAL_DECIMAL:
597                                 val = new System.Decimal ();
598                                 val = System.Decimal.Parse (
599                                         s, styles, csharp_format_info);
600                                 break;
601                         case Token.LITERAL_DOUBLE:
602                                 val = new System.Double ();
603                                 val = System.Double.Parse (
604                                         s, styles, csharp_format_info);
605                                 break;
606                         case Token.LITERAL_SINGLE:
607                                 val = new System.Double ();
608                                 val = (float) System.Double.Parse (
609                                         s, styles, csharp_format_info);
610                                 break;
611
612                         case Token.NONE:
613                                 val = new System.Double ();
614                                 val = System.Double.Parse (
615                                         s, styles, csharp_format_info);
616                                 t = Token.LITERAL_DOUBLE;
617                                 break;
618                         }
619                         return t;
620                 }
621
622                 long hex_digits ()
623                 {
624                         StringBuilder hexNumber = new StringBuilder ();
625                         
626                         int d;
627
628                         while ((d = peekChar ()) != -1){
629                                 char e = Char.ToUpper ((char) d);
630                                 
631                                 if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
632                                         hexNumber.Append (e);
633                                         getChar ();
634                                 } else
635                                         break;
636                         }
637                         return System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
638                 }
639
640                 long octal_digits ()
641                 {
642                         long valueToReturn = 0;
643                         
644                         int d;
645
646                         while ((d = peekChar ()) != -1){
647                                 char e = (char)d;                       
648                                 if (Char.IsDigit (e) && (e < '8')){
649                                         valueToReturn *= 8;
650                                         valueToReturn += (d - (int)'0');
651                                         getChar ();
652                                 } else
653                                         break;
654                         }
655                         
656                         return valueToReturn;
657                 }
658
659                 int handle_integer_literal_in_other_bases(int peek)
660                 {
661                         if (peek == 'h' || peek == 'H'){
662                                 getChar ();
663                                 val = hex_digits ();
664                                 return integer_type_suffix (peekChar ());
665                         }
666                         
667                         if (peek == 'o' || peek == 'O'){
668                                 getChar ();
669                                 val = octal_digits ();
670                                 return integer_type_suffix (peekChar ());
671                         }
672                         
673                         return Token.NONE;
674                 }
675                 
676                 //
677                 // Invoked if we know we have .digits or digits
678                 //
679                 int is_number (int c)
680                 {
681                         bool is_real = false;
682                         number = new StringBuilder ();
683                         int type;
684
685                         number.Length = 0;
686
687                         if (Char.IsDigit ((char)c)){
688                                 decimal_digits (c);
689                                 c = peekChar ();
690                         }
691
692                         //
693                         // We need to handle the case of
694                         // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
695                         //
696                         if (c == '.'){
697                                 if (decimal_digits (getChar())){
698                                         is_real = true;
699                                         c = peekChar ();
700                                 } else {
701                                         putback ('.');
702                                         number.Length -= 1;
703                                         val = System.Int64.Parse(number.ToString());
704                                         return integer_type_suffix('.');
705                                 }
706                         }
707                         
708                         if (c == 'e' || c == 'E'){
709                                 is_real = true;
710                                 number.Append ("e");
711                                 getChar ();
712                                 
713                                 c = peekChar ();
714                                 if (c == '+'){
715                                         number.Append ((char) c);
716                                         getChar ();
717                                         c = peekChar ();
718                                 } else if (c == '-'){
719                                         number.Append ((char) c);
720                                         getChar ();
721                                         c = peekChar ();
722                                 }
723                                 decimal_digits (-1);
724                                 c = peekChar ();
725                         }
726
727                         type = real_type_suffix (c);
728                         if (type == Token.NONE && !is_real){
729                                 val = System.Int64.Parse(number.ToString());
730                                 return integer_type_suffix(c);
731                         }
732                         
733                         return adjust_real (type);
734                 }
735                         
736                 int getChar ()
737                 {
738                         if (putback_char != -1){
739                                 int x = putback_char;
740                                 putback_char = -1;
741
742                                 return x;
743                         }
744                         return reader.Read ();
745                 }
746
747                 int peekChar ()
748                 {
749                         if (putback_char != -1)
750                                 return putback_char;
751                         return reader.Peek ();
752                 }
753
754                 void putback (int c)
755                 {
756                         if (putback_char != -1)
757                                 throw new Exception ("This should not happen putback on putback");
758                         putback_char = c;
759                 }
760
761                 public bool advance ()
762                 {
763                         return current_token != Token.EOF ;
764                 }
765
766                 public Object Value {
767                         get {
768                                 return val;
769                         }
770                 }
771
772                 public Object value ()
773                 {
774                         return val;
775                 }
776
777                 private bool IsEOL(int currentChar)
778                 {
779                         bool retVal;
780                         
781                         if (currentChar ==  0x0D) {
782                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
783                                         getChar();
784
785                                 retVal = true;
786                         }
787                         else {
788                                 retVal = (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
789                         }
790
791                         if(retVal) {
792                                 nextLine();
793                         }
794
795                         return retVal;
796                 }
797
798                 private int DropComments()              
799                 {
800                         int d;
801                         while (!IsEOL(d = getChar ()))
802                                 col++;
803
804                         return Token.EOL;
805                 }       
806                         
807                 public int token ()
808                 {
809                         int lastToken = current_token;
810                         do
811                         {
812                                 current_token = xtoken ();
813                                 if (current_token == 0) 
814                                         return Token.EOF;
815                                 if (current_token == Token.REM)
816                                         current_token = DropComments();
817                         } while (lastToken == Token.EOL && current_token == Token.EOL);
818
819                         // Console.WriteLine ("Token = " + val);
820
821                         return current_token;
822                 }
823
824                 private string GetIdentifier()
825                 {
826                         int c = getChar();
827                         if (is_identifier_start_character ((char) c))
828                                 return GetIdentifier(c);
829                         else
830                                 return null;
831                 }
832
833                 private string GetIdentifier(int c)
834                 {
835                         StringBuilder id = new StringBuilder ();
836
837                         id.Append ((char) c);
838                                 
839                         while ((c = peekChar ()) != -1) 
840                         {
841                                 if (is_identifier_part_character ((char) c))
842                                 {
843                                         id.Append ((char)getChar ());
844                                         col++;
845                                 } 
846                                 else 
847                                         break;
848                         }
849                         
850                         cant_have_a_type_character = false;
851                         
852                         return id.ToString();
853                 }
854
855                 private bool is_doublequote(int currentChar)
856                 {
857                         return (currentChar == '"' || 
858                                         currentChar == 0x201C || // unicode left double-quote character
859                                         currentChar == 0x201D);  // unicode right double-quote character
860                 }
861                 
862                 private bool is_whitespace(int c)
863                 {
864                         return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
865                 }
866
867                 private void GobbleWhiteSpaces ()
868                 {
869                         int d = peekChar ();
870                         while (is_whitespace (d)) {
871                                 getChar ();
872                                 d = peekChar ();
873                         }
874
875                 }
876                 
877                 private bool tokens_seen = false;
878                 
879                 private void nextLine()
880                 {
881                         cant_have_a_type_character = true;
882                         line++;
883                         ref_line++;
884                         col = 0;
885                         tokens_seen = false;
886                 }
887
888                 public int xtoken ()
889                 {
890                         int t;
891                         bool doread = false;
892                         int c;
893
894                         val = null;
895                         for (;(c = getChar ()) != -1; col++) {
896                         
897                                 // Handle line continuation character
898                                 if (c == '_') 
899                                 {
900                                         int d = peekChar();
901                                         if (!is_identifier_part_character((char)d)) {
902                                                 while ((c = getChar ()) != -1 && !IsEOL(c)) {}
903                                                 c = getChar ();                 
904                                         }               
905                                 }
906
907                                 // white space
908                                 if (is_whitespace(c)) {
909                                         // expand tabs for location
910                                         if (c == '\t')
911                                                 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
912                                         cant_have_a_type_character = true;
913                                         continue;
914                                 }
915                                 
916                                 // Handle line comments.
917                                 if (c == '\'')
918                                         return Token.REM;                                       
919                                 
920                                 // Handle EOL.
921                                 if (IsEOL(c))
922                                 {
923                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping
924                                                 continue;
925                                         return Token.EOL;
926                                 }
927                                 
928                                 // Handle escaped identifiers
929                                 if (c == '[')
930                                 {
931                                         if ((val = GetIdentifier()) == null)
932                                                 break;
933                                         if ((c = getChar()) != ']')
934                                                 break;
935                                         tokens_seen = true;
936                                         return Token.IDENTIFIER;
937                                 }
938
939                                 // Handle unescaped identifiers
940                                 if (is_identifier_start_character ((char) c))
941                                 {
942                                         string id;
943                                         if ((id = GetIdentifier(c)) == null)
944                                                 break;
945                                         val = id;
946                                         tokens_seen = true;
947                                         if (is_keyword(id) && (current_token != Token.DOT))
948                                                 return getKeyword(id);
949                                         return Token.IDENTIFIER;
950                                 }
951
952                                 // Treat string literals
953                                 if (is_doublequote(c)) {
954                                         cant_have_a_type_character = true;
955                                         return ExtractStringOrCharLiteral(c);
956                                 }
957                         
958                                 // handle numeric literals
959                                 if (c == '.')
960                                 {
961                                         cant_have_a_type_character = true;
962                                         tokens_seen = true;
963                                         if (Char.IsDigit ((char) peekChar ()))
964                                                 return is_number (c);
965                                         return Token.DOT;
966                                 }
967                                 
968                                 if (Char.IsDigit ((char) c))
969                                 {
970                                         cant_have_a_type_character = true;
971                                         tokens_seen = true;
972                                         return is_number (c);
973                                 }
974
975                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
976                                         cant_have_a_type_character = true;
977
978                                         if (t == Token.NONE)
979                                                 continue;
980                                                 
981                                         if (doread){
982                                                 getChar ();
983                                                 col++;
984                                         }
985                                         tokens_seen = true;
986                                         return t;
987                                 }
988                                 
989                                 error_details = ((char)c).ToString ();
990                                 return Token.ERROR;
991                         }
992
993                         if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
994                                 return Token.EOL;
995                         
996                         return Token.EOF;
997                 }
998
999                 private int ParseDateLiteral ()
1000                 {
1001                         int c, d;
1002                         object temp;
1003                         int month = 1, day = 1, year = 1, hours = 0, minutes = 0, seconds = 0, date_separator;
1004                         bool minutes_specified = false, seconds_specified = false;
1005                         bool am_specified = false, pm_specified = false;
1006                         
1007
1008                         GobbleWhiteSpaces ();
1009                         d = peekChar ();
1010                         if (d == '#') 
1011                                 goto parse_error;
1012
1013                         temp = ParseIntLiteral ();
1014                         if (temp == null)
1015                                 goto parse_error;
1016
1017                         d = peekChar ();
1018                         if (d == '/' || d == '-') {
1019                                 c = getChar ();
1020                                 date_separator = c;
1021
1022                                 month = (int) temp;
1023                                 // Console.WriteLine ("Month: " + month);
1024
1025                                 temp = ParseIntLiteral ();
1026                                 if (temp == null)
1027                                         goto parse_error;
1028                                 day = (int) temp;
1029                                 // Console.WriteLine ("Day: " + day);
1030
1031                                 c = getChar ();
1032                                 if (c != date_separator)
1033                                         goto parse_error;
1034
1035                                 temp = ParseIntLiteral ();
1036                                 if (temp == null)
1037                                         goto parse_error;
1038                                 year = (int) temp;
1039                                 // Console.WriteLine ("Year: " + year);
1040
1041                                 GobbleWhiteSpaces ();
1042                                 d = peekChar ();
1043                                 if (d == '#') {
1044                                         c = getChar ();
1045                                         goto parse_done;
1046                                 }
1047
1048                                 temp = ParseIntLiteral ();
1049                                 if (temp == null) 
1050                                         goto parse_error;
1051                                 d = peekChar ();
1052                         }
1053
1054                         hours = (int) temp;
1055                         // Console.WriteLine ("Hours: " + hours);
1056                         
1057                         if (d == ':') {
1058                                 c = getChar ();
1059                                 
1060                                 temp = ParseIntLiteral ();
1061                                 if (temp == null)
1062                                         goto parse_error;
1063                                 minutes = (int) temp; 
1064                                 // Console.WriteLine ("Minutes: " + minutes);
1065                                 minutes_specified = true;
1066
1067                                 d = peekChar ();
1068                                 if (d == ':') {
1069                                         c = getChar ();
1070
1071                                         temp = ParseIntLiteral ();
1072                                         if (temp == null)
1073                                                 goto parse_error;
1074                                         seconds = (int) temp;
1075                                         // Console.WriteLine ("Seconds: " + seconds);
1076                                         seconds_specified = true;
1077                                 } 
1078                         }
1079
1080
1081                         GobbleWhiteSpaces ();
1082                         d = peekChar ();
1083                         if (d == 'A' ) {
1084                                 c = getChar ();
1085
1086                                 d = peekChar ();
1087                                 if (d != 'M')
1088                                         goto parse_error;
1089
1090                                 c = getChar ();
1091                                 // Console.WriteLine ("AM");
1092                                 am_specified = true;
1093                         } else if (d == 'P' ) {
1094                                 c = getChar ();
1095
1096                                 d = peekChar ();
1097                                 if (d != 'M')
1098                                         goto parse_error;
1099                                 
1100                                 c = getChar ();
1101                                 // Console.WriteLine ("PM");
1102                                 pm_specified = true;
1103                         }
1104                         
1105                         GobbleWhiteSpaces ();
1106                         
1107                         d = peekChar ();
1108                         if (d == '#') {
1109                                 c = getChar ();
1110                                 if (!minutes_specified && !seconds_specified &&  !am_specified && ! pm_specified)
1111                                         goto parse_error;
1112
1113                                 if ((am_specified || pm_specified) && hours > 12)
1114                                         goto parse_error;
1115
1116                                 if (pm_specified)
1117                                         hours += 12;
1118
1119                                 goto parse_done;
1120                         }
1121
1122                 parse_error:
1123                         // Console.WriteLine ("Parse Error");
1124                         return Token.ERROR;
1125
1126                 parse_done:
1127                         try {
1128                                 temp = new DateTime (year, month, day, hours, minutes, seconds);
1129                         } catch (Exception ex) {
1130                                 // Console.WriteLine (ex);
1131                                 return Token.ERROR;
1132                         }
1133
1134                         // Console.WriteLine ("Success");
1135                         val =temp;
1136                         return Token.LITERAL_DATE;
1137                 }
1138                 
1139                 private int ExtractStringOrCharLiteral(int c)
1140                 {
1141                         StringBuilder s = new StringBuilder ();
1142
1143                         tokens_seen = true;
1144
1145                         while ((c = getChar ()) != -1){
1146                                 if (is_doublequote(c)){
1147                                         if (is_doublequote(peekChar()))
1148                                                 getChar();
1149                                         else {
1150                                                 //handle Char Literals
1151                                                 if (peekChar() == 'C' || peekChar() == 'c') {
1152                                                         getChar();
1153                                                         if (s.Length == 1) {
1154                                                                 val = s[0];
1155                                                                 return Token.LITERAL_CHARACTER;
1156                                                         } else {
1157                                                                 val = "Incorrect length for a character literal";
1158                                                                 return Token.ERROR;
1159                                                         }                                                       
1160                                                 } else {
1161                                                         val = s.ToString ();
1162                                                         return Token.LITERAL_STRING;
1163                                                 }
1164                                         }
1165                                 }
1166
1167                                 if (IsEOL(c)) {
1168                                         return Token.ERROR;
1169                                 }
1170                         
1171                                 s.Append ((char) c);
1172                         }
1173                                         
1174                         return Token.ERROR;
1175                 }
1176
1177                 private object ParseIntLiteral ()
1178                 {
1179                         object retval;
1180                         
1181                         int d = peekChar ();
1182                         
1183                         if (!Char.IsDigit ((char) d))
1184                                 return null;
1185
1186                         int c = getChar ();
1187                         number = new StringBuilder ();
1188                         decimal_digits (c);
1189
1190                         try {
1191                                 retval = System.Int32.Parse (number.ToString ());
1192                         } catch (Exception ex) {
1193                                 return null; 
1194                         }
1195
1196                         return retval;
1197                 }
1198  
1199                 public void PositionCursorAtNextPreProcessorDirective()
1200                 {
1201                         int t;
1202                         
1203                         for(t = token(); t != Token.HASH && t != Token.EOF; t = token());
1204
1205                         if(t == Token.EOF)
1206                                 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1207                         
1208                         if(t == Token.HASH) {
1209                                 tokens_seen = false;
1210                                 putback('#');
1211                         }
1212                 }
1213
1214                 public void cleanup ()
1215                 {
1216                         // FIXME;
1217                 }
1218
1219                 public static void Cleanup () 
1220                 {
1221                 }
1222
1223         }
1224 }