2003/03/03 Rafael Teixeira <rafaelteixeirabr@hotmail.com>
[mono.git] / mcs / mbas / mb-tokenizer.cs
1 //
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
3 //
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 //         
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
7 //
8 // Licensed under the terms of the GNU GPL
9 //
10 // Copyright (C) 2001 A Rafael D Teixeira
11 //
12
13 namespace Mono.MonoBASIC
14 {
15         using System;
16         using System.Text;
17         using System.Collections;
18         using System.IO;
19         using System.Globalization;
20         using Mono.Languages;
21         using Mono.CSharp;
22         
23         /// <summary>
24         ///    Tokenizer for MonoBASIC source code. 
25         /// </summary>
26         
27         public class Tokenizer : yyParser.yyInput
28         {
29                 TextReader reader;
30                 public string ref_name;
31                 public int ref_line = 1;
32                 public int line = 1;
33                 public int col = 1;
34                 public int current_token;
35                 bool handle_get_set = false;
36
37                 public int ExpandedTabsSize = 4; 
38
39                 public string location {
40                         get {
41                                 string det;
42
43                                 if (current_token == Token.ERROR)
44                                         det = "detail: " + error_details;
45                                 else
46                                         det = "";
47                                 
48                                 return "Line:     "+line+" Col: "+col + "\n" +
49                                        "VirtLine: "+ref_line +
50                                        " Token: "+current_token + " " + det;
51                         }
52                 }
53
54                 public bool properties {
55                         get {
56                                 return handle_get_set;
57                         }
58
59                         set {
60                                 handle_get_set = value;
61                         }
62                 }
63                 
64                 //
65                 // Class variables
66                 // 
67                 static Hashtable keywords;
68                 static NumberStyles styles;
69                 static NumberFormatInfo csharp_format_info;
70                 
71                 //
72                 // Values for the associated token returned
73                 //
74                 System.Text.StringBuilder number;
75                 int putback_char;
76                 Object val;
77                 
78                 //
79                 // Details about the error encoutered by the tokenizer
80                 //
81                 string error_details;
82                 
83                 public string error {
84                         get {
85                                 return error_details;
86                         }
87                 }
88                 
89                 public int Line {
90                         get {
91                                 return line;
92                         }
93                 }
94
95                 public int Col {
96                         get {
97                                 return col;
98                         }
99                 }
100                 
101                 static void initTokens ()
102                 {
103                         keywords = new Hashtable ();
104
105                         keywords.Add ("addhandler", Token.ADDHANDLER);
106                         keywords.Add ("addressof", Token.ADDRESSOF);
107                         keywords.Add ("alias", Token.ALIAS);
108                         keywords.Add ("and", Token.AND);
109                         keywords.Add ("andalso", Token.ANDALSO);
110                         keywords.Add ("ansi", Token.ANSI);
111                         keywords.Add ("as", Token.AS);
112                         keywords.Add ("assembly", Token.ASSEMBLY);
113                         keywords.Add ("auto", Token.AUTO);
114                         keywords.Add ("binary", Token.BINARY);
115                         keywords.Add ("boolean", Token.BOOLEAN);
116                         keywords.Add ("byref", Token.BYREF);
117                         keywords.Add ("byte", Token.BYTE);
118                         keywords.Add ("byval", Token.BYVAL);
119                         keywords.Add ("call", Token.CALL);
120                         keywords.Add ("case", Token.CASE);
121                         keywords.Add ("catch", Token.CATCH);
122                         keywords.Add ("cbool", Token.CBOOL);
123                         keywords.Add ("cbyte", Token.CBYTE);
124                         keywords.Add ("cchar", Token.CCHAR);
125                         keywords.Add ("cdate", Token.CDATE);
126                         keywords.Add ("cdec", Token.CDEC);
127                         keywords.Add ("cdbl", Token.CDBL);
128                         keywords.Add ("char", Token.CHAR);
129                         keywords.Add ("cint", Token.CINT);
130                         keywords.Add ("class", Token.CLASS);
131                         keywords.Add ("clng", Token.CLNG);
132                         keywords.Add ("cobj", Token.COBJ);
133                         keywords.Add ("compare", Token.COMPARE);
134                         keywords.Add ("const", Token.CONST);
135                         keywords.Add ("cshort", Token.CSHORT);
136                         keywords.Add ("csng", Token.CSNG);
137                         keywords.Add ("cstr", Token.CSTR);
138                         keywords.Add ("ctype", Token.CTYPE);
139                         keywords.Add ("date", Token.DATE);
140                         keywords.Add ("decimal", Token.DECIMAL);
141                         keywords.Add ("declare", Token.DECLARE);
142                         keywords.Add ("default", Token.DEFAULT);
143                         keywords.Add ("delegate", Token.DELEGATE);
144                         keywords.Add ("dim", Token.DIM);
145                         keywords.Add ("do", Token.DO);
146                         keywords.Add ("double", Token.DOUBLE);
147                         keywords.Add ("each", Token.EACH);
148                         keywords.Add ("else", Token.ELSE);
149                         keywords.Add ("elseif", Token.ELSEIF);
150                         keywords.Add ("end", Token.END);
151                         keywords.Add ("enum", Token.ENUM);
152                         keywords.Add ("erase", Token.ERASE);
153                         keywords.Add ("error", Token.ERROR);
154                         keywords.Add ("event", Token.EVENT);
155                         keywords.Add ("exit", Token.EXIT);
156                         keywords.Add ("explicit", Token.EXPLICIT);
157                         keywords.Add ("false", Token.FALSE);
158                         keywords.Add ("finally", Token.FINALLY);
159                         keywords.Add ("for", Token.FOR);
160                         keywords.Add ("friend", Token.FRIEND);
161                         keywords.Add ("function", Token.FUNCTION);
162                         keywords.Add ("get", Token.GET);
163                         //keywords.Add ("gettype", Token.GETTYPE);
164                         keywords.Add ("goto", Token.GOTO);
165                         keywords.Add ("handles", Token.HANDLES);
166                         keywords.Add ("if", Token.IF);
167                         keywords.Add ("implements", Token.IMPLEMENTS);
168                         keywords.Add ("imports", Token.IMPORTS);
169                         keywords.Add ("in", Token.IN);
170                         keywords.Add ("inherits", Token.INHERITS);
171                         keywords.Add ("integer", Token.INTEGER);
172                         keywords.Add ("interface", Token.INTERFACE);
173                         keywords.Add ("is", Token.IS);
174                         keywords.Add ("let ", Token.LET );
175                         keywords.Add ("lib ", Token.LIB );
176                         keywords.Add ("like ", Token.LIKE );
177                         keywords.Add ("long", Token.LONG);
178                         keywords.Add ("loop", Token.LOOP);
179                         keywords.Add ("me", Token.ME);
180                         keywords.Add ("mod", Token.MOD);
181                         keywords.Add ("module", Token.MODULE);
182                         keywords.Add ("mustinherit", Token.MUSTINHERIT);
183                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
184                         keywords.Add ("mybase", Token.MYBASE);
185                         keywords.Add ("myclass", Token.MYCLASS);
186                         keywords.Add ("namespace", Token.NAMESPACE);
187                         keywords.Add ("new", Token.NEW);
188                         keywords.Add ("next", Token.NEXT);
189                         keywords.Add ("not", Token.NOT);
190                         keywords.Add ("nothing", Token.NOTHING);
191                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);
192                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
193                         keywords.Add ("object", Token.OBJECT);
194                         keywords.Add ("off", Token.OFF);
195                         keywords.Add ("on", Token.ON);
196                         keywords.Add ("option", Token.OPTION);
197                         keywords.Add ("optional", Token.OPTIONAL);
198                         keywords.Add ("or", Token.OR);
199                         keywords.Add ("orelse", Token.ORELSE);
200                         keywords.Add ("overloads", Token.OVERLOADS);
201                         keywords.Add ("overridable", Token.OVERRIDABLE);
202                         keywords.Add ("overrides", Token.OVERRIDES);
203                         keywords.Add ("paramarray", Token.PARAM_ARRAY);
204                         keywords.Add ("preserve", Token.PRESERVE);
205                         keywords.Add ("private", Token.PRIVATE);
206                         keywords.Add ("property", Token.PROPERTY);
207                         keywords.Add ("protected", Token.PROTECTED);
208                         keywords.Add ("public", Token.PUBLIC);
209                         keywords.Add ("raiseevent", Token.RAISEEVENT);
210                         keywords.Add ("readonly", Token.READONLY);
211                         keywords.Add ("redim", Token.REDIM);
212                         keywords.Add ("rem", Token.REM);
213                         keywords.Add ("removehandler", Token.REMOVEHANDLER);
214                         keywords.Add ("resume", Token.RESUME);
215                         keywords.Add ("return", Token.RETURN);
216                         keywords.Add ("select", Token.SELECT);
217                         keywords.Add ("set", Token.SET);
218                         keywords.Add ("shadows", Token.SHADOWS);
219                         keywords.Add ("shared", Token.SHARED);
220                         keywords.Add ("short", Token.SHORT);
221                         keywords.Add ("single", Token.SINGLE);
222                         keywords.Add ("sizeof", Token.SIZEOF);
223                         keywords.Add ("static", Token.STATIC);
224                         keywords.Add ("step", Token.STEP);
225                         keywords.Add ("stop", Token.STOP);
226                         keywords.Add ("strict", Token.STRICT);
227                         keywords.Add ("string", Token.STRING);
228                         keywords.Add ("structure", Token.STRUCTURE);
229                         keywords.Add ("sub", Token.SUB);
230                         keywords.Add ("synclock", Token.SYNCLOCK);
231                         keywords.Add ("text", Token.TEXT);
232                         keywords.Add ("then", Token.THEN);
233                         keywords.Add ("throw", Token.THROW);
234                         keywords.Add ("to", Token.TO);
235                         keywords.Add ("true", Token.TRUE);
236                         keywords.Add ("try", Token.TRY);
237                         keywords.Add ("typeof", Token.TYPEOF);
238                         keywords.Add ("unicode", Token.UNICODE);
239                         keywords.Add ("until", Token.UNTIL);
240                         keywords.Add ("variant", Token.VARIANT);
241                         keywords.Add ("when", Token.WHEN);
242                         keywords.Add ("while", Token.WHILE);
243                         keywords.Add ("with", Token.WITH);
244                         keywords.Add ("withevents", Token.WITHEVENTS);
245                         keywords.Add ("writeonly", Token.WRITEONLY);
246                         keywords.Add ("xor", Token.XOR);
247                 }
248
249                 //
250                 // Class initializer
251                 // 
252                 static Tokenizer ()
253                 {
254                         initTokens ();
255                         csharp_format_info = new NumberFormatInfo ();
256                         csharp_format_info.CurrencyDecimalSeparator = ".";
257                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
258                 }
259
260                 bool is_keyword (string name)
261                 {
262                         bool res;
263
264                         res = keywords.Contains(name.ToLower());
265                         if ((name == "get" || name == "set") && handle_get_set == false)
266                                 return false;
267                         return res;
268                 }
269
270                 int getKeyword (string name)
271                 {
272                         return (int) (keywords [name.ToLower()]);
273                 }
274                 
275                 public Location Location {
276                         get {
277                                 return new Location (ref_line);
278                         }
279                 }
280                 
281                 public bool PropertyParsing {
282                         get {
283                                 return handle_get_set;
284                         }
285
286                         set {
287                                 handle_get_set = value;
288                         }
289                 }
290                                 
291                 bool is_identifier_start_character (char c)
292                 {
293                         return Char.IsLetter (c) || c == '_' ;
294                 }
295
296                 bool is_identifier_part_character (char c)
297                 {
298                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
299                 }
300
301                 int is_punct (char c, ref bool doread)
302                 {
303                         int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);
304                         int d;
305                         int t;
306
307                         doread = false;
308
309                         switch (c){
310                         case '[':
311                                 return Token.OPEN_BRACKET;
312                         case ']':
313                                 return Token.CLOSE_BRACKET;
314                         case '{':
315                                 return Token.OPEN_BRACE;
316                         case '}':
317                                 return Token.CLOSE_BRACE;                               
318                         case '(':
319                                 return Token.OPEN_PARENS;
320                         case ')':
321                                 return Token.CLOSE_PARENS;
322                         case ',':
323                                 return Token.COMMA;
324                         //case ':':
325                         //      return Token.COLON;
326                         case '?':
327                                 return Token.INTERR;
328                         case '&':
329                                 return Token.OP_CONCAT;                         
330                         }
331
332                         d = peekChar ();
333                         if (c == '+'){
334                                 
335                                 if (d == '+')
336                                         t = Token.OP_INC;
337                                 else if (d == '=')
338                                         t = Token.OP_ADD_ASSIGN;
339                                 else
340                                         return Token.PLUS;
341                                 doread = true;
342                                 return t;
343                         }
344                         if (c == '-'){
345                                 if (d == '=')
346                                         t = Token.OP_SUB_ASSIGN;
347                                 else
348                                         return Token.MINUS;
349                                 doread = true;
350                                 return t;
351                         }
352
353                         if (c == '='){
354                                 /*if (d == '='){
355                                         doread = true;
356                                         return Token.OP_EQ;
357                                 }*/
358                                 return Token.ASSIGN;
359                         }
360
361                         if (c == '*'){
362                                 if (d == '='){
363                                         doread = true;
364                                         return Token.OP_MULT_ASSIGN;
365                                 }
366                                 return Token.STAR;
367                         }
368
369                         if (c == '/'){
370                                 if (d == '='){
371                                         doread = true;
372                                         return Token.OP_DIV_ASSIGN;
373                                 }
374                                 return Token.DIV;
375                         }
376
377                         if (c == '\\'){
378                                 if (d == '='){
379                                         doread = true;
380                                         return Token.OP_IDIV_ASSIGN;
381                                 }
382                                 return Token.OP_IDIV;
383                         }
384
385                         if (c == '^'){
386                                 if (d == '='){
387                                         doread = true;
388                                         return Token.OP_EXP_ASSIGN;
389                                 }
390                                 return Token.OP_EXP;
391                         }
392
393                         if (c == '<'){
394                                 if (d == '>')
395                                 {
396                                         doread = true;
397                                         return Token.OP_NE;
398                                 }
399                                 if (d == '='){
400                                         doread = true;
401                                         return Token.OP_LE;
402                                 }
403                                 return Token.OP_LT;
404                         }
405
406                         if (c == '>'){
407                                 if (d == '='){
408                                         doread = true;
409                                         return Token.OP_GE;
410                                 }
411                                 return Token.OP_GT;
412                         }
413                         if (c == ':'){
414                                 if (d == '='){
415                                         doread = true;
416                                         return Token.ATTR_ASSIGN;
417                                 }
418                                 return Token.COLON;
419                         }                       
420                         return Token.ERROR;
421                 }
422
423                 bool decimal_digits (int c)
424                 {
425                         int d;
426                         bool seen_digits = false;
427                         
428                         if (c != -1)
429                                 number.Append ((char) c);
430                         
431                         while ((d = peekChar ()) != -1){
432                                 if (Char.IsDigit ((char)d)){
433                                         number.Append ((char) d);
434                                         getChar ();
435                                         seen_digits = true;
436                                 } else
437                                         break;
438                         }
439                         return seen_digits;
440                 }
441
442                 void hex_digits (int c)
443                 {
444                         int d;
445
446                         if (c != -1)
447                                 number.Append ((char) c);
448                         while ((d = peekChar ()) != -1){
449                                 char e = Char.ToUpper ((char) d);
450                                 
451                                 if (Char.IsDigit (e) ||
452                                     (e >= 'A' && e <= 'F')){
453                                         number.Append ((char) e);
454                                         getChar ();
455                                 } else
456                                         break;
457                         }
458                 }
459                 
460                 int real_type_suffix (int c)
461                 {
462                         int t;
463                         
464                         switch (c){
465                         case 'F': case 'f':
466                                 t =  Token.LITERAL_SINGLE;
467                                 break;
468                         case 'D': case 'd':
469                                 t = Token.LITERAL_DOUBLE;
470                                 break;
471                         case 'M': case 'm':
472                                  t= Token.LITERAL_DECIMAL;
473                                 break;
474                         default:
475                                 return Token.NONE;
476                         }
477                         getChar ();
478                         return t;
479                 }
480
481                 int integer_type_suffix (int c)
482                 {
483                         // FIXME: Handle U and L suffixes.
484                         // We also need to see in which kind of
485                         // Int the thing fits better according to the spec.
486                         return Token.LITERAL_INTEGER;
487                 }
488                 
489                 void adjust_int (int t)
490                 {
491                         val = new System.Int32();
492                         val = System.Int32.Parse (number.ToString (), 0);
493                 }
494
495                 int adjust_real (int t)
496                 {
497                         string s = number.ToString ();
498
499                         Console.WriteLine (s);
500                         switch (t){
501                         case Token.LITERAL_DECIMAL:
502                                 val = new System.Decimal ();
503                                 val = System.Decimal.Parse (
504                                         s, styles, csharp_format_info);
505                                 break;
506                         case Token.LITERAL_DOUBLE:
507                                 val = new System.Double ();
508                                 val = System.Double.Parse (
509                                         s, styles, csharp_format_info);
510                                 break;
511                         case Token.LITERAL_SINGLE:
512                                 val = new System.Double ();
513                                 val = (float) System.Double.Parse (
514                                         s, styles, csharp_format_info);
515                                 break;
516
517                         case Token.NONE:
518                                 val = new System.Double ();
519                                 val = System.Double.Parse (
520                                         s, styles, csharp_format_info);
521                                 t = Token.LITERAL_DOUBLE;
522                                 break;
523                         }
524                         return t;
525                 }
526
527                 //
528                 // Invoked if we know we have .digits or digits
529                 //
530                 int is_number (int c)
531                 {
532                         bool is_real = false;
533                         number = new System.Text.StringBuilder ();
534                         int type;
535
536                         number.Length = 0;
537
538                         if (Char.IsDigit ((char)c)){
539                                 if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){
540                                         getChar ();
541                                         hex_digits (-1);
542                                         val = new System.Int32 ();
543                                         val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);
544                                         return integer_type_suffix (peekChar ());
545                                 }
546                                 decimal_digits (c);
547                                 c = getChar ();
548                         }
549
550                         //
551                         // We need to handle the case of
552                         // "1.1" vs "1.string" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
553                         //
554                         if (c == '.'){
555                                 if (decimal_digits ('.')){
556                                         is_real = true;
557                                         c = peekChar ();
558                                 } else {
559                                         putback ('.');
560                                         number.Length -= 1;
561                                         adjust_int (Token.LITERAL_INTEGER);
562                                         return Token.LITERAL_INTEGER;
563                                 }
564                         }
565                         
566                         if (c == 'e' || c == 'E'){
567                                 is_real = true;
568                                 number.Append ("e");
569                                 getChar ();
570                                 
571                                 c = peekChar ();
572                                 if (c == '+'){
573                                         number.Append ((char) c);
574                                         getChar ();
575                                         c = peekChar ();
576                                 } else if (c == '-'){
577                                         number.Append ((char) c);
578                                         getChar ();
579                                         c = peekChar ();
580                                 }
581                                 decimal_digits (-1);
582                                 c = peekChar ();
583                         }
584
585                         type = real_type_suffix (c);
586                         if (type == Token.NONE && !is_real){
587                                 type = integer_type_suffix (c);
588                                 adjust_int (type);
589                                 putback (c);
590                                 return type;
591                         } else
592                                 is_real = true;
593
594                         if (is_real)
595                                 return adjust_real (type);
596
597                         Console.WriteLine ("This should not be reached");
598                         throw new Exception ("Is Number should never reach this point");
599                 }
600                         
601                 int escape (int c)
602                 {
603                         int d;
604                         int v;
605
606                         d = peekChar ();
607                         if (c != '\\')
608                                 return c;
609                         
610                         switch (d){
611                         case 'a':
612                                 v = '\a'; break;
613                         case 'b':
614                                 v = '\b'; break;
615                         case 'n':
616                                 v = '\n'; break;
617                         case 't':
618                                 v = '\t'; break;
619                         case 'v':
620                                 v = '\v'; break;
621                         case 'r':
622                                 v = 'c'; break;
623                         case '\\':
624                                 v = '\\'; break;
625                         case 'f':
626                                 v = '\f'; break;
627                         case '0':
628                                 v = 0; break;
629                         case '"':
630                                 v = '"'; break;
631                         case '\'':
632                                 v = '\''; break;
633                         default:
634                                 error_details = "cs1009: Unrecognized escape sequence " + (char)d;
635                                 return -1;
636                         }
637                         getChar ();
638                         return v;
639                 }
640
641                 int getChar ()
642                 {
643                         if (putback_char != -1){
644                                 int x = putback_char;
645                                 putback_char = -1;
646
647                                 return x;
648                         }
649                         return reader.Read ();
650                 }
651
652                 int peekChar ()
653                 {
654                         if (putback_char != -1)
655                                 return putback_char;
656                         return reader.Peek ();
657                 }
658
659                 void putback (int c)
660                 {
661                         if (putback_char != -1)
662                                 throw new Exception ("This should not happen putback on putback");
663                         putback_char = c;
664                 }
665
666                 public bool advance ()
667                 {
668                         return current_token != Token.EOF ;
669                 }
670
671                 public Object Value {
672                         get {
673                                 return val;
674                         }
675                 }
676
677                 public Object value ()
678                 {
679                         return val;
680                 }
681
682                 private bool IsEOL(int currentChar)
683                 {
684                         if (currentChar ==  0x0D)
685                         {
686                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
687                                         getChar();
688
689                                 return true;
690                         }
691                         return (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
692                 }
693
694                 private int DropComments()              
695                 {
696                         int d;
697                         while (!IsEOL(d = getChar ()))
698                                 col++;
699                         line++;
700                         ref_line++;
701                         col = 0;
702
703                         return Token.EOL;
704                 }       
705                         
706                 public int token ()
707                 {
708                         int lastToken = current_token;
709                         do
710                         {
711                                 current_token = xtoken ();
712                                 if (current_token == 0) 
713                                         return Token.EOF;
714                                 if (current_token == Token.REM)
715                                         current_token = DropComments();
716                         } while (lastToken == Token.EOL && current_token == Token.EOL);
717
718                         return current_token;
719                 }
720
721                 private string GetIdentifier()
722                 {
723                         int c = getChar();
724                         if (is_identifier_start_character ((char) c))
725                                 return GetIdentifier(c);
726                         else
727                                 return null;
728                 }
729
730                 private string GetIdentifier(int c)
731                 {
732                         System.Text.StringBuilder id = new System.Text.StringBuilder ();
733
734                         id.Append ((char) c);
735                                 
736                         while ((c = peekChar ()) != -1) 
737                         {
738                                 if (is_identifier_part_character ((char) c))
739                                 {
740                                         id.Append ((char)getChar ());
741                                         col++;
742                                 } 
743                                 else 
744                                         break;
745                         }
746
747                         return id.ToString ();
748                 }
749
750                 public int xtoken ()
751                 {
752                         int t;
753                         bool doread = false;
754                         int c;
755
756                         val = null;
757                         for (;(c = getChar ()) != -1; col++) {
758                         
759                                 // Handle line comments.
760                                 if (c == '\'')
761                                         return Token.REM;
762                                         
763                                 // Handle line continuation character
764                                 if (c == '_') {
765                                         while ((c = getChar ()) != -1 && (c != '\n')){}
766                                         c = getChar ();                                 
767                                 }
768                                 // Handle EOL.
769                                 if (IsEOL(c))
770                                 {
771                                         line++;
772                                         ref_line++;
773                                         col = 0;
774                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping
775                                                 continue;
776                                         return Token.EOL;
777                                 }
778                                 
779                                 // Handle escaped identifiers
780                                 if (c == '[')
781                                 {
782                                         if ((val = GetIdentifier()) == null)
783                                                 break;
784                                         if ((c = getChar()) != ']')
785                                                 break;
786                                         return Token.IDENTIFIER;
787                                 }
788
789                                 // Handle unescaped identifiers
790                                 if (is_identifier_start_character ((char) c))
791                                 {
792                                         string id;
793                                         if ((id = GetIdentifier(c)) == null)
794                                                 break;
795                                         val = id;
796                                         if (is_keyword(id))
797                                                 return getKeyword(id);
798                                         return Token.IDENTIFIER;
799                                 }
800
801                                 // handle numeric literals
802                                 if (c == '.'){
803                                         if (Char.IsDigit ((char) peekChar ()))
804                                                 return is_number (c);
805                                         return Token.DOT;
806                                 }
807                                 
808                                 if (Char.IsDigit ((char) c))
809                                         return is_number (c);
810
811                                 /* For now, limited support for pre-processor commands */
812                                 if (col == 1 && c == '#'){
813                                         System.Text.StringBuilder s = new System.Text.StringBuilder ();
814                                         
815                                         while ((c = getChar ()) != -1 && (c != '\n')){
816                                                 s.Append ((char) c);
817                                         }
818                                         if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){
819                                                 string arg = s.ToString ().Substring (5);
820                                                 int pos;
821
822                                                 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
823                                                         ref_line = System.Int32.Parse (arg.Substring (0, pos));
824                                                         pos++;
825
826                                                         char [] quotes = { '\"' };
827
828                                                         ref_name = arg.Substring (pos);
829                                                         ref_name.TrimStart (quotes);
830                                                         ref_name.TrimEnd (quotes);
831                                                 } else
832                                                         ref_line = System.Int32.Parse (arg);
833                                         }
834                                         line++;
835                                         ref_line++;
836                                         continue;
837                                 }
838                                 
839                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
840                                         if (doread){
841                                                 getChar ();
842                                                 col++;
843                                         }
844                                         return t;
845                                 }
846                                 
847                                 // Treat string literals
848                                 if (c == '"'){
849                                         System.Text.StringBuilder s = new System.Text.StringBuilder ();
850
851                                         while ((c = getChar ()) != -1){
852                                                 if (c == '"'){ // TODO: treat double-doublequotes
853                                                         val = s.ToString ();
854                                                         return Token.LITERAL_STRING;
855                                                 }
856
857                                                 c = escape (c);
858                                                 if (c == -1)
859                                                         return Token.ERROR;
860                                                 s.Append ((char) c);
861                                         }
862                                 }
863                         
864                                 // expand tabs for location and ignore it as whitespace
865                                 if (c == '\t')
866                                 {
867                                         col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
868                                         continue;
869                                 }
870
871                                 // white space
872                                 if (c == ' ' || c == '\f' || c == '\v')
873                                         continue;
874
875                                 error_details = ((char)c).ToString ();
876                                 
877                                 return Token.ERROR;
878                         }
879
880                         if (current_token != Token.EOL) // if last token wasn´t EOL send it before EOF
881                                 return Token.EOL;
882                         
883                         return Token.EOF;
884                 }
885
886                 public void cleanup ()
887                 {
888 /* borrowed from mcs - have to work it to have preprocessing in mbas
889
890                         if (ifstack != null && ifstack.Count >= 1) {
891                                 int state = (int) ifstack.Pop ();
892                                 if ((state & REGION) != 0)
893                                         Report.Error (1038, "#endregion directive expected");
894                                 else 
895                                         Report.Error (1027, "#endif directive expected");
896                         }
897 */                              
898                 }
899
900                 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
901                 {
902                         this.ref_name = fname;
903                         reader = input;
904                         putback_char = -1;
905                         
906                         Location.Push (fname);
907                 }
908
909         }
910 }