To fix make test failure
[mono.git] / mcs / mbas / mb-tokenizer.cs
1 //
2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
3 //
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
5 //       : Manjula GHM (mmanjula@novell.com)  
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
7 //
8 // Licensed under the terms of the GNU GPL
9 //
10 // Copyright (C) 2001 A Rafael D Teixeira
11 //
12
13 namespace Mono.MonoBASIC
14 {
15         using System;
16         using System.Text;
17         using System.Collections;
18         using System.IO;
19         using System.Globalization;
20         using Mono.Languages;
21         using Mono.MonoBASIC;
22         
23         /// <summary>
24         ///    Tokenizer for MonoBASIC source code. 
25         /// </summary>
26         
27         public class Tokenizer : yyParser.yyInput
28         {
29                 TextReader reader;
30                 string file_name;
31                 string ref_name;
32                 int ref_line = 0;
33                 int line = 0;
34                 int col = 1;
35                 public int current_token = Token.ERROR;
36                 bool handle_get_set = false;
37                 bool cant_have_a_type_character = false;
38
39                 public int ExpandedTabsSize = 4; 
40
41                 public string location {
42                         get {
43                                 string det;
44
45                                 if (current_token == Token.ERROR)
46                                         det = "detail: " + error_details;
47                                 else
48                                         det = "";
49                                 
50                                 return "Line:     "+line+" Col: "+col + "\n" +
51                                        "VirtLine: "+ref_line +
52                                        " Token: "+current_token + " " + det;
53                         }
54                 }
55
56                 public bool properties {
57                         get {
58                                 return handle_get_set;
59                         }
60
61                         set {
62                                 handle_get_set = value;
63                         }
64                 }
65                 
66                 //
67                 // Class variables
68                 // 
69                 static Hashtable keywords;
70                 static NumberStyles styles;
71                 static NumberFormatInfo csharp_format_info;
72                 
73                 //
74                 // Values for the associated token returned
75                 //
76                 StringBuilder number;
77                 int putback_char = -1;
78                 Object val;
79                 
80                 //
81                 // Details about the error encoutered by the tokenizer
82                 //
83                 string error_details;
84                 
85                 public string error {
86                         get {
87                                 return error_details;
88                         }
89                 }
90
91                 
92                 public string Source {
93                         get {
94                                 return file_name;
95                         }
96
97                         set {
98                                 file_name = value;
99                                 ref_name = value;
100                                 Location.SetCurrentSource(file_name);
101                         }
102                 }
103
104                 public string EffectiveSource {
105                         get {
106                                 return ref_name;
107                         }
108                         set {
109                                 ref_name = value;
110                                 Location.SetCurrentSource(ref_name);
111                         }
112                 }
113
114                 public int Line {
115                         get {
116                                 return line;
117                         }
118                 }
119
120                 public int EffectiveLine {
121                         get {
122                                 return ref_line;
123                         }
124                         set {
125                                 ref_line = value;
126                         }
127                 }
128
129                 public int Col {
130                         get {
131                                 return col;
132                         }
133                 }
134                 
135                 static void initTokens ()
136                 {
137                         keywords = new Hashtable ();
138
139                         keywords.Add ("addhandler", Token.ADDHANDLER);
140                         keywords.Add ("addressof", Token.ADDRESSOF);
141                         keywords.Add ("alias", Token.ALIAS);
142                         keywords.Add ("and", Token.AND);
143                         keywords.Add ("andalso", Token.ANDALSO);
144                         keywords.Add ("ansi", Token.ANSI);
145                         keywords.Add ("as", Token.AS);
146                         keywords.Add ("assembly", Token.ASSEMBLY);
147                         keywords.Add ("auto", Token.AUTO);
148                         keywords.Add ("binary", Token.BINARY); // Not a VB.NET Keyword 
149                         keywords.Add ("boolean", Token.BOOLEAN);
150                         keywords.Add ("byref", Token.BYREF);
151                         keywords.Add ("byte", Token.BYTE);
152                         keywords.Add ("byval", Token.BYVAL);
153                         keywords.Add ("call", Token.CALL);
154                         keywords.Add ("case", Token.CASE);
155                         keywords.Add ("catch", Token.CATCH);
156                         keywords.Add ("cbool", Token.CBOOL);
157                         keywords.Add ("cbyte", Token.CBYTE);
158                         keywords.Add ("cchar", Token.CCHAR);
159                         keywords.Add ("cdate", Token.CDATE);
160                         keywords.Add ("cdec", Token.CDEC);
161                         keywords.Add ("cdbl", Token.CDBL);
162                         keywords.Add ("char", Token.CHAR);
163                         keywords.Add ("cint", Token.CINT);
164                         keywords.Add ("class", Token.CLASS);
165                         keywords.Add ("clng", Token.CLNG);
166                         keywords.Add ("cobj", Token.COBJ);
167                         keywords.Add ("compare", Token.COMPARE); // Not a VB.NET Keyword
168                         keywords.Add ("const", Token.CONST);
169                         keywords.Add ("cshort", Token.CSHORT);
170                         keywords.Add ("csng", Token.CSNG);
171                         keywords.Add ("cstr", Token.CSTR);
172                         keywords.Add ("ctype", Token.CTYPE);
173                         keywords.Add ("date", Token.DATE);
174                         keywords.Add ("decimal", Token.DECIMAL);
175                         keywords.Add ("declare", Token.DECLARE);
176                         keywords.Add ("default", Token.DEFAULT);
177                         keywords.Add ("delegate", Token.DELEGATE);
178                         keywords.Add ("dim", Token.DIM);
179                         keywords.Add ("directcast", Token.DIRECTCAST);                  
180                         keywords.Add ("do", Token.DO);
181                         keywords.Add ("double", Token.DOUBLE);
182                         keywords.Add ("each", Token.EACH);
183                         keywords.Add ("else", Token.ELSE);
184                         keywords.Add ("elseif", Token.ELSEIF);
185                         keywords.Add ("end", Token.END);
186                         keywords.Add ("endif", Token.ENDIF); // An unused VB.NET keyword
187                         keywords.Add ("enum", Token.ENUM);
188                         keywords.Add ("erase", Token.ERASE);
189                         keywords.Add ("error", Token.ERROR);
190                         keywords.Add ("event", Token.EVENT);
191                         keywords.Add ("exit", Token.EXIT);
192                         keywords.Add ("explicit", Token.EXPLICIT); // Not a VB.NET keyword 
193                         keywords.Add ("false", Token.FALSE);
194                         keywords.Add ("finally", Token.FINALLY);
195                         keywords.Add ("for", Token.FOR);
196                         keywords.Add ("friend", Token.FRIEND);
197                         keywords.Add ("function", Token.FUNCTION);
198                         keywords.Add ("get", Token.GET);
199                         keywords.Add ("gettype", Token.GETTYPE);
200                         keywords.Add ("gosub", Token.GOSUB); // An unused VB.NET keyword 
201                         keywords.Add ("goto", Token.GOTO);
202                         keywords.Add ("handles", Token.HANDLES);
203                         keywords.Add ("if", Token.IF);
204                         keywords.Add ("implements", Token.IMPLEMENTS);
205                         keywords.Add ("imports", Token.IMPORTS);
206                         keywords.Add ("in", Token.IN);
207                         keywords.Add ("inherits", Token.INHERITS);
208                         keywords.Add ("integer", Token.INTEGER);
209                         keywords.Add ("interface", Token.INTERFACE);
210                         keywords.Add ("is", Token.IS);
211                         keywords.Add ("let ", Token.LET ); // An unused VB.NET keyword
212                         keywords.Add ("lib ", Token.LIB );
213                         keywords.Add ("like", Token.LIKE );
214                         keywords.Add ("long", Token.LONG);
215                         keywords.Add ("loop", Token.LOOP);
216                         keywords.Add ("me", Token.ME);
217                         keywords.Add ("mod", Token.MOD);
218                         keywords.Add ("module", Token.MODULE);
219                         keywords.Add ("mustinherit", Token.MUSTINHERIT);
220                         keywords.Add ("mustoverride", Token.MUSTOVERRIDE);
221                         keywords.Add ("mybase", Token.MYBASE);
222                         keywords.Add ("myclass", Token.MYCLASS);
223                         keywords.Add ("namespace", Token.NAMESPACE);
224                         keywords.Add ("new", Token.NEW);
225                         keywords.Add ("next", Token.NEXT);
226                         keywords.Add ("not", Token.NOT);
227                         keywords.Add ("nothing", Token.NOTHING);
228                         keywords.Add ("notinheritable", Token.NOTINHERITABLE);
229                         keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
230                         keywords.Add ("object", Token.OBJECT);
231                         keywords.Add ("off", Token.OFF); // Not a VB.NET Keyword 
232                         keywords.Add ("on", Token.ON);
233                         keywords.Add ("option", Token.OPTION);
234                         keywords.Add ("optional", Token.OPTIONAL);
235                         keywords.Add ("or", Token.OR);
236                         keywords.Add ("orelse", Token.ORELSE);
237                         keywords.Add ("overloads", Token.OVERLOADS);
238                         keywords.Add ("overridable", Token.OVERRIDABLE);
239                         keywords.Add ("overrides", Token.OVERRIDES);
240                         keywords.Add ("paramarray", Token.PARAM_ARRAY);
241                         keywords.Add ("preserve", Token.PRESERVE);
242                         keywords.Add ("private", Token.PRIVATE);
243                         keywords.Add ("property", Token.PROPERTY);
244                         keywords.Add ("protected", Token.PROTECTED);
245                         keywords.Add ("public", Token.PUBLIC);
246                         keywords.Add ("raiseevent", Token.RAISEEVENT);
247                         keywords.Add ("readonly", Token.READONLY);
248                         keywords.Add ("redim", Token.REDIM);
249                         keywords.Add ("rem", Token.REM);
250                         keywords.Add ("removehandler", Token.REMOVEHANDLER);
251                         keywords.Add ("resume", Token.RESUME);
252                         keywords.Add ("return", Token.RETURN);
253                         keywords.Add ("select", Token.SELECT);
254                         keywords.Add ("set", Token.SET);
255                         keywords.Add ("shadows", Token.SHADOWS);
256                         keywords.Add ("shared", Token.SHARED);
257                         keywords.Add ("short", Token.SHORT);
258                         keywords.Add ("single", Token.SINGLE);
259                         keywords.Add ("sizeof", Token.SIZEOF); // Not a VB.NET Keyword 
260                         keywords.Add ("static", Token.STATIC);
261                         keywords.Add ("step", Token.STEP);
262                         keywords.Add ("stop", Token.STOP);
263                         keywords.Add ("strict", Token.STRICT); // Not a VB.NET Keyword 
264                         keywords.Add ("string", Token.STRING);
265                         keywords.Add ("structure", Token.STRUCTURE);
266                         keywords.Add ("sub", Token.SUB);
267                         keywords.Add ("synclock", Token.SYNCLOCK);
268                         keywords.Add ("text", Token.TEXT); // Not a VB.NET Keyword
269                         keywords.Add ("then", Token.THEN);
270                         keywords.Add ("throw", Token.THROW);
271                         keywords.Add ("to", Token.TO);
272                         keywords.Add ("true", Token.TRUE);
273                         keywords.Add ("try", Token.TRY);
274                         keywords.Add ("typeof", Token.TYPEOF);
275                         keywords.Add ("unicode", Token.UNICODE);
276                         keywords.Add ("until", Token.UNTIL);
277                         keywords.Add ("variant", Token.VARIANT); // An unused VB.NET keyword
278                         keywords.Add ("wend", Token.WEND); // An unused VB.NET keyword
279                         keywords.Add ("when", Token.WHEN);
280                         keywords.Add ("while", Token.WHILE);
281                         keywords.Add ("with", Token.WITH);
282                         keywords.Add ("withevents", Token.WITHEVENTS);
283                         keywords.Add ("writeonly", Token.WRITEONLY);
284                         keywords.Add ("xor", Token.XOR);
285
286                         if (Parser.UseExtendedSyntax){
287                                 keywords.Add ("yield", Token.YIELD);
288                         }
289
290                 }
291
292                 static Tokenizer ()
293                 {
294                         initTokens ();
295                         csharp_format_info = new NumberFormatInfo ();
296                         csharp_format_info.CurrencyDecimalSeparator = ".";
297                         styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
298                 }
299
300                 public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
301                 {
302                         this.Source = fname;
303
304                         reader = input;
305
306                         // putback an EOL at the beginning of a stream. This is a convenience that 
307                         // allows pre-processor directives to be added to the beginning of a vb file.
308                         putback('\n');
309                 }
310
311                 bool is_keyword (string name)
312                 {
313                         bool res;
314                         name = name.ToLower();
315
316                         res = keywords.Contains(name);
317                         if ((name == "GET" || name == "SET") && handle_get_set == false)
318                                 return false;
319                         return res;
320                 }
321
322                 int getKeyword (string name)
323                 {
324                         return (int) (keywords [name.ToLower()]);
325                 }
326                 
327                 public Location Location {
328                         get {
329                                 return new Location (ref_line, col);
330                         }
331                 }
332                 
333                 public bool PropertyParsing {
334                         get {
335                                 return handle_get_set;
336                         }
337
338                         set {
339                                 handle_get_set = value;
340                         }
341                 }
342                                 
343                 bool is_identifier_start_character (char c)
344                 {
345                         return Char.IsLetter (c) || c == '_' ;
346                 }
347
348                 bool is_identifier_part_character (char c)
349                 {
350                         return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_');
351                 }
352
353                 int is_punct (char c, ref bool doread)
354                 {
355                         int d;
356                         int t;
357
358                         doread = false;
359                         
360                         error_details = c.ToString();
361                         
362                         d = peekChar ();
363                         
364                         switch (c){
365                         case '[':
366                                 return Token.OPEN_BRACKET;
367                         case ']':
368                                 return Token.CLOSE_BRACKET;
369                         case '{':
370                                 return Token.OPEN_BRACE;
371                         case '}':
372                                 return Token.CLOSE_BRACE;                               
373                         case '(':
374                                 return Token.OPEN_PARENS;
375                         case ')':
376                                 return Token.CLOSE_PARENS;
377                         case ',':
378                                 return Token.COMMA;
379                         case '?':
380                                 return Token.INTERR;
381                         case '!':
382                                 if (is_identifier_start_character((char)d) || cant_have_a_type_character)
383                                         return Token.EXCLAMATION;
384                                 return Token.SINGLETYPECHAR;
385                         case '$':
386                                 if (cant_have_a_type_character)
387                                         return Token.ERROR;
388                                 return Token.DOLAR_SIGN;
389                         case '@':
390                                 if (cant_have_a_type_character)
391                                         return Token.ERROR;
392                                 return Token.AT_SIGN;
393                         case '%':
394                                 if (cant_have_a_type_character)
395                                         return Token.ERROR;
396                                 return Token.PERCENT;
397                         case '#':
398                                 if(tokens_seen)
399                                 {
400                                         if (cant_have_a_type_character) 
401                                                 return ExtractDateTimeLiteral();
402                                         else
403                                                 return Token.NUMBER_SIGN;
404                                 }
405                                 else 
406                                 {
407                                         tokens_seen = true;
408                                         return Token.HASH;
409                                 } 
410                         case '&':
411                                 if (!cant_have_a_type_character)
412                                         return Token.LONGTYPECHAR;
413                                 t = handle_integer_literal_in_other_bases(d);
414                                 if (t == Token.NONE) {
415                                         t = Token.OP_CONCAT;
416                                 }
417                                 return t;                       
418                         }
419
420                         if (c == '+'){
421                                 if (d == '+')
422                                         t = Token.OP_INC;
423                                 else 
424                                         return Token.PLUS;
425                                 doread = true;
426                                 return t;
427                         }
428                         if (c == '-'){
429                                 return Token.MINUS;
430                         }
431
432                         if (c == '='){
433                                 return Token.ASSIGN;
434                         }
435
436                         if (c == '*'){
437                                 return Token.STAR;
438                         }
439
440                         if (c == '/'){
441                                 return Token.DIV;
442                         }
443
444                         if (c == '\\'){
445                                 return Token.OP_IDIV;
446                         }
447
448                         if (c == '^'){
449                                 return Token.OP_EXP;
450                         }
451
452                         if (c == '<'){
453                                 if (d == '>')
454                                 {
455                                         doread = true;
456                                         return Token.OP_NE;
457                                 }
458                                 if (d == '='){
459                                         doread = true;
460                                         return Token.OP_LE;
461                                 }
462                                 if (d == '<')
463                                 {
464                                         doread = true;
465                                         return Token.OP_SHIFT_LEFT;
466                                 }
467                                 return Token.OP_LT;
468                         }
469
470                         if (c == '>'){
471                                 if (d == '='){
472                                         doread = true;
473                                         return Token.OP_GE;
474                                 }
475                                 if (d == '>')
476                                 {
477                                         doread = true;
478                                         return Token.OP_SHIFT_RIGHT;
479                                 }
480                                 return Token.OP_GT;
481                         }
482                         
483                         if (c == ':'){
484                                 if (d == '='){
485                                         doread = true;
486                                         return Token.ATTR_ASSIGN;
487                                 }
488                                 return Token.COLON;
489                         }                       
490                         
491                         return Token.ERROR;
492                 }
493
494                 bool decimal_digits (int c)
495                 {
496                         int d;
497                         bool seen_digits = false;
498                         
499                         if (c != -1)
500                                 number.Append ((char) c);
501                         while ((d = peekChar ()) != -1){
502                                 if (Char.IsDigit ((char)d)){
503                                         number.Append ((char) d);
504                                         getChar ();
505                                         seen_digits = true;
506                                 } else
507                                         break;
508                         }
509                         return seen_digits;
510                 }
511
512                 
513                 int real_type_suffix (int c)
514                 {
515                         int t;
516                         
517                         switch (c){
518                         case 'F': case 'f':
519                                 t =  Token.LITERAL_SINGLE;
520                                 break;
521                         case 'R': case 'r':
522                                 t = Token.LITERAL_DOUBLE;
523                                 break;
524                         case 'D': case 'd':
525                                  t= Token.LITERAL_DECIMAL;
526                                 break;
527                         default:
528                                 return Token.NONE;
529                         }
530                         getChar ();
531                         return t;
532                 }
533
534                 int integer_type_suffix (int c)
535                 {
536                         int t;
537                         
538                         try {
539                         
540                                 switch (c){
541                                 case 'S': case 's':
542                                         t =  Token.LITERAL_INTEGER; // SHORT ?
543                                         val = ((IConvertible)val).ToInt16(null);
544                                         break;
545                                 case 'I': case 'i':
546                                         t = Token.LITERAL_INTEGER;
547                                         val = ((IConvertible)val).ToInt32(null);
548                                         break;
549                                 case 'L': case 'l':
550                                          t= Token.LITERAL_INTEGER; // LONG ?
551                                          val = ((IConvertible)val).ToInt64(null);
552                                         break;
553                                 default:
554                                         if ((long)val <= System.Int32.MaxValue &&
555                                                 (long)val >= System.Int32.MinValue) {
556                                                 val = ((IConvertible)val).ToInt32(null);
557                                                 return Token.LITERAL_INTEGER;
558                                         } else {
559                                                 val = ((IConvertible)val).ToInt64(null);
560                                                 return Token.LITERAL_INTEGER; // LONG ?
561                                         }
562                                 }
563                                 getChar ();
564                                 return t;
565                         } catch (Exception e) {
566                                 val = e.ToString();
567                                 return Token.ERROR;
568                         }
569                 }
570                 
571                 int adjust_real (int t)
572                 {
573                         string s = number.ToString ();
574
575                         switch (t){
576                         case Token.LITERAL_DECIMAL:
577                                 val = new System.Decimal ();
578                                 val = System.Decimal.Parse (
579                                         s, styles, csharp_format_info);
580                                 break;
581                         case Token.LITERAL_DOUBLE:
582                                 val = new System.Double ();
583                                 val = System.Double.Parse (
584                                         s, styles, csharp_format_info);
585                                 break;
586                         case Token.LITERAL_SINGLE:
587                                 val = new System.Double ();
588                                 val = (float) System.Double.Parse (
589                                         s, styles, csharp_format_info);
590                                 break;
591
592                         case Token.NONE:
593                                 val = new System.Double ();
594                                 val = System.Double.Parse (
595                                         s, styles, csharp_format_info);
596                                 t = Token.LITERAL_DOUBLE;
597                                 break;
598                         }
599                         return t;
600                 }
601
602                 long hex_digits ()
603                 {
604                         StringBuilder hexNumber = new StringBuilder ();
605                         
606                         int d;
607
608                         while ((d = peekChar ()) != -1){
609                                 char e = Char.ToUpper ((char) d);
610                                 
611                                 if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
612                                         hexNumber.Append (e);
613                                         getChar ();
614                                 } else
615                                         break;
616                         }
617                         return System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
618                 }
619
620                 long octal_digits ()
621                 {
622                         long valueToReturn = 0;
623                         
624                         int d;
625
626                         while ((d = peekChar ()) != -1){
627                                 char e = (char)d;                       
628                                 if (Char.IsDigit (e) && (e < '8')){
629                                         valueToReturn *= 8;
630                                         valueToReturn += (d - (int)'0');
631                                         getChar ();
632                                 } else
633                                         break;
634                         }
635                         
636                         return valueToReturn;
637                 }
638
639                 int handle_integer_literal_in_other_bases(int peek)
640                 {
641                         if (peek == 'h' || peek == 'H'){
642                                 getChar ();
643                                 val = hex_digits ();
644                                 return integer_type_suffix (peekChar ());
645                         }
646                         
647                         if (peek == 'o' || peek == 'O'){
648                                 getChar ();
649                                 val = octal_digits ();
650                                 return integer_type_suffix (peekChar ());
651                         }
652                         
653                         return Token.NONE;
654                 }
655                 
656                 //
657                 // Invoked if we know we have .digits or digits
658                 //
659                 int is_number (int c)
660                 {
661                         bool is_real = false;
662                         number = new StringBuilder ();
663                         int type;
664                         bool non_prefixdecimal = false; //To capture decimals like .50
665
666                         number.Length = 0;
667
668                         if (Char.IsDigit ((char)c)){
669                                 decimal_digits (c);
670                                 c = peekChar ();        
671                                 non_prefixdecimal = true;
672                         }
673
674                         //
675                         // We need to handle the case of
676                         // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
677                         //
678                         if (c == '.'){
679                                 if (non_prefixdecimal == false)
680                                          putback ('.');
681                                 if (decimal_digits (getChar())){
682                                         is_real = true;
683                                         c = peekChar ();
684                                 } else {
685                                         putback ('.');
686                                         number.Length -= 1;
687                                         val = System.Int64.Parse(number.ToString());
688                                         return integer_type_suffix('.');
689                                 }
690                         }
691                         
692                         if (c == 'e' || c == 'E'){
693                                 is_real = true;
694                                 number.Append ("e");
695                                 getChar ();
696                                 
697                                 c = peekChar ();
698                                 if (c == '+'){
699                                         number.Append ((char) c);
700                                         getChar ();
701                                         c = peekChar ();
702                                 } else if (c == '-'){
703                                         number.Append ((char) c);
704                                         getChar ();
705                                         c = peekChar ();
706                                 }
707                                 decimal_digits (-1);
708                                 c = peekChar ();
709                         }
710
711                         type = real_type_suffix (c);
712                         if (type == Token.NONE && !is_real){
713                                 val = System.Int64.Parse(number.ToString());
714                                 return integer_type_suffix(c);
715                         }
716                         
717                         return adjust_real (type);
718                 }
719                         
720                 int getChar ()
721                 {
722                         if (putback_char != -1){
723                                 int x = putback_char;
724                                 putback_char = -1;
725
726                                 return x;
727                         }
728                         return reader.Read ();
729                 }
730
731                 int peekChar ()
732                 {
733                         if (putback_char != -1)
734                                 return putback_char;
735                         return reader.Peek ();
736                 }
737                 
738
739                 void putback (int c)
740                 {
741                         if (putback_char != -1)
742                                 throw new Exception ("This should not happen putback on putback");
743                         putback_char = c;
744                 }
745
746                 public bool advance ()
747                 {
748                         return current_token != Token.EOF ;
749                 }
750
751                 public Object Value {
752                         get {
753                                 return val;
754                         }
755                 }
756
757                 public Object value ()
758                 {
759                         return val;
760                 }
761
762                 private bool IsEOL(int currentChar)
763                 {
764                         bool retVal;
765                         
766                         if (currentChar ==  0x0D) {
767                                 if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
768                                         getChar();
769
770                                 retVal = true;
771                         }
772                         else {
773                                 retVal = (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
774                         }
775
776                         if(retVal) {
777                                 nextLine();
778                         }
779
780                         return retVal;
781                 }
782
783                 private int DropComments()              
784                 {
785                         //int d;
786                         while (!IsEOL(/*d =*/ getChar ()))
787                                 col++;
788
789                         return Token.EOL;
790                 }       
791                 
792                 public bool putbacktoken = false;
793                 public bool flag = false;               
794                 int next_token;
795                         
796                 public int token ()
797                 {
798                         int lastToken = current_token;
799                         do
800                         {
801                                 current_token = xtoken ();
802                                 if(current_token == Token.END) {
803                                         next_token = xtoken();
804                                         putbacktoken = true;
805                                         if (next_token == Token.EOL) 
806                                                 return Token.END_EOL;
807                                          else 
808                                                 return Token.END;
809                                 }       
810                                 if (current_token == 0) 
811                                         return Token.EOF;
812                                 if (current_token == Token.REM)
813                                         current_token = DropComments();
814                         } while (lastToken == Token.EOL && current_token == Token.EOL);
815
816                         return current_token;
817                 }
818
819                 private string GetIdentifier()
820                 {
821                         int c = getChar();
822                         if (is_identifier_start_character ((char) c))
823                                 return GetIdentifier(c);
824                         else
825                                 return null;
826                 }
827
828                 private string GetIdentifier(int c)
829                 {
830                         StringBuilder id = new StringBuilder ();
831
832                         id.Append ((char) c);
833                                 
834                         while ((c = peekChar ()) != -1) 
835                         {
836                                 if (is_identifier_part_character ((char) c))
837                                 {
838                                         id.Append ((char)getChar ());
839                                         col++;
840                                 } 
841                                 else 
842                                         break;
843                         }
844                         
845                         cant_have_a_type_character = false;
846                         
847                         return id.ToString();
848                 }
849
850                 private bool is_doublequote(int currentChar)
851                 {
852                         return (currentChar == '"' || 
853                                         currentChar == 0x201C || // unicode left double-quote character
854                                         currentChar == 0x201D);  // unicode right double-quote character
855                 }
856                 
857                 private bool is_whitespace(int c)
858                 {
859                         return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
860                 }
861                 
862                 private bool tokens_seen = false;
863                 
864                 private void nextLine()
865                 {
866                         cant_have_a_type_character = true;
867                         line++;
868                         ref_line++;
869                         col = 0;
870                         tokens_seen = false;
871                 }
872
873                 public int xtoken ()
874                 {
875                         int t;
876                         bool doread = false;
877                         int c;
878
879                         if (putbacktoken == true) {
880                                 putbacktoken = false;
881                                 return next_token;
882                         }
883         
884                         val = null;
885                         for (;(c = getChar ()) != -1; col++) {
886                         
887                                 // Handle line continuation character
888                                 if (c == '_') 
889                                 {
890                                         int d = peekChar();
891                                         if (!is_identifier_part_character((char)d)) {
892                                                 while ((c = getChar ()) != -1 && !IsEOL(c)) {}
893                                                 c = getChar ();                 
894                                         }               
895                                 }
896                                         
897                                 
898                                 // white space
899                                 if (is_whitespace(c)) {
900                                         // expand tabs for location
901                                         if (c == '\t')
902                                                 col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
903                                         cant_have_a_type_character = true;
904                                         continue;
905                                 }
906                                 
907                                 // Handle line comments.
908                                 if (c == '\'')
909                                         return Token.REM;                                       
910                                 
911                                 // Handle EOL.
912                                 if (IsEOL(c))
913                                 {
914                                         if (current_token == Token.EOL) // if last token was also EOL keep skipping
915                                                 continue;
916                                         return Token.EOL;
917                                 }
918                                 
919                                 // Handle escaped identifiers
920                                 if (c == '[')
921                                 {
922                                         if ((val = GetIdentifier()) == null)
923                                                 break;
924                                         if ((c = getChar()) != ']')
925                                                 break;
926                                         tokens_seen = true;
927                                         return Token.IDENTIFIER;
928                                 }
929
930                                 // Handle unescaped identifiers
931                                 if (is_identifier_start_character ((char) c))
932                                 {
933                                         string id;
934                                         if ((id = GetIdentifier(c)) == null)
935                                                 break;
936                                         val = id;
937                                         tokens_seen = true;
938                                         if (is_keyword(id) && (current_token != Token.DOT))
939                                                 return getKeyword(id);
940                                         return Token.IDENTIFIER;
941                                 }
942
943                                 // Treat string literals
944                                 if (is_doublequote(c)) {
945                                         cant_have_a_type_character = true;
946                                         return ExtractStringOrCharLiteral(c);
947                                 }
948                         
949                                 // handle numeric literals
950
951                                 if (Char.IsDigit ((char) c))
952                                 {
953                                         cant_have_a_type_character = true;
954                                         tokens_seen = true;
955                                         return is_number (c);
956                                 }
957
958                                 if (c == '.')
959                                 {
960                                         cant_have_a_type_character = true;
961                                         tokens_seen = true;
962                                         if (Char.IsDigit ((char) peekChar ()))
963                                                 return is_number (c);
964                                         return Token.DOT;
965                                 }
966                                 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
967                                         cant_have_a_type_character = true;
968
969                                         if (t == Token.NONE)
970                                                 continue;
971                                                 
972                                         if (doread){
973                                                 getChar ();
974                                                 col++;
975                                         }
976                                         tokens_seen = true;
977                                         return t;
978                                 }
979                                 
980                                 error_details = ((char)c).ToString ();
981                                 return Token.ERROR;
982                         }
983
984                         if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
985                                 return Token.EOL;
986                         
987                         return Token.EOF;
988                 }
989
990                 private int ExtractDateTimeLiteral()
991                 {
992                         int c;
993                         
994                         StringBuilder sb = new StringBuilder();
995                         for (;(c = getChar ()) != -1; col++)
996                         {
997                                 if (c == '#') {
998                                         val = ParseDateLiteral(sb);
999                                         return Token.LITERAL_DATE;
1000                                 }
1001                                 if (IsEOL(c)) {
1002                                         break;
1003                                 } 
1004                                 if (c == '-')
1005                                         c = '/';
1006                                 sb.Append((char)c);
1007                         }
1008                         return Token.ERROR;
1009                 }
1010                 
1011                 private int ExtractStringOrCharLiteral(int c)
1012                 {
1013                         StringBuilder s = new StringBuilder ();
1014
1015                         tokens_seen = true;
1016
1017                         while ((c = getChar ()) != -1){
1018                                 if (is_doublequote(c)){
1019                                         if (is_doublequote(peekChar()))
1020                                                 getChar();
1021                                         else {
1022                                                 //handle Char Literals
1023                                                 if (peekChar() == 'C' || peekChar() == 'c') {
1024                                                         getChar();
1025                                                         if (s.Length == 1) {
1026                                                                 val = s[0];
1027                                                                 return Token.LITERAL_CHARACTER;
1028                                                         } else {
1029                                                                 val = "Incorrect length for a character literal";
1030                                                                 return Token.ERROR;
1031                                                         }                                                       
1032                                                 } else {
1033                                                         val = s.ToString ();
1034                                                         return Token.LITERAL_STRING;
1035                                                 }
1036                                         }
1037                                 }
1038
1039                                 if (IsEOL(c)) {
1040                                         return Token.ERROR;
1041                                 }
1042                         
1043                                 s.Append ((char) c);
1044                         }
1045                                         
1046                         return Token.ERROR;
1047                 }
1048
1049                 static IFormatProvider enUSculture = new CultureInfo("en-US", true);
1050
1051                 private DateTime ParseDateLiteral(StringBuilder value)
1052                 {
1053                         try
1054                         {
1055                                 return DateTime.Parse(value.ToString(),
1056                                                   enUSculture,
1057                                                   DateTimeStyles.NoCurrentDateDefault | DateTimeStyles.AllowWhiteSpaces);
1058                         }
1059                         catch (FormatException ex)
1060                         {
1061                                 //TODO: What is the correct error number and message?
1062                                 Report.Error (1, Location, string.Format("Invalid date literal '{0}'", value.ToString()) 
1063                                         + Environment.NewLine + ex.ToString());
1064                         }
1065                         catch (Exception)
1066                         {
1067                                 Report.Error (1, Location, "Error parsing date literal");       //TODO: What is the correct error number and message?
1068                         }
1069                         return new DateTime();
1070                 }
1071  
1072                 public void PositionCursorAtNextPreProcessorDirective()
1073                 {
1074                         int t;
1075                         
1076                         for(t = token(); t != Token.HASH && t != Token.EOF ; t = token()); 
1077
1078                         if(t == Token.EOF)
1079                                 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1080                         
1081                         if(t == Token.HASH) {
1082                                 tokens_seen = false;
1083                                 putback('#');
1084                         }
1085                 }
1086
1087         }
1088 }