2006-08-17 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / mbas / mb-tokenizer.cs
index 37a4754754cf121cf407ec034580c5985ec4ffd3..f1f2c267e5595615f0e5d8618cfde863ed3e3dce 100644 (file)
@@ -2,7 +2,7 @@
 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
 //
 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
-//        
+//      : Manjula GHM (mmanjula@novell.com)  
 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
 //
 // Licensed under the terms of the GNU GPL
@@ -18,7 +18,7 @@ namespace Mono.MonoBASIC
        using System.IO;
        using System.Globalization;
        using Mono.Languages;
-       using Mono.CSharp;
+       using Mono.MonoBASIC;
        
        /// <summary>
        ///    Tokenizer for MonoBASIC source code. 
@@ -27,12 +27,15 @@ namespace Mono.MonoBASIC
        public class Tokenizer : yyParser.yyInput
        {
                TextReader reader;
-               public string ref_name;
-               public int ref_line = 1;
-               public int line = 1;
-               public int col = 1;
-               public int current_token;
+               string file_name;
+               string ref_name;
+               int ref_line = 0;
+               int line = 0;
+               int col = 1;
+               public int current_token = Token.ERROR;
+               public int last_token = Token.ERROR;
                bool handle_get_set = false;
+               bool cant_have_a_type_character = false;
 
                public int ExpandedTabsSize = 4; 
 
@@ -71,9 +74,10 @@ namespace Mono.MonoBASIC
                //
                // Values for the associated token returned
                //
-               System.Text.StringBuilder number;
-               int putback_char;
+               StringBuilder number;
+               int putback_char = -1;
                Object val;
+               long lon = 0;
                
                //
                // Details about the error encoutered by the tokenizer
@@ -85,13 +89,45 @@ namespace Mono.MonoBASIC
                                return error_details;
                        }
                }
+
                
+               public string Source {
+                       get {
+                               return file_name;
+                       }
+
+                       set {
+                               file_name = value;
+                               ref_name = value;
+                               Location.SetCurrentSource(file_name);
+                       }
+               }
+
+               public string EffectiveSource {
+                       get {
+                               return ref_name;
+                       }
+                       set {
+                               ref_name = value;
+                               Location.SetCurrentSource(ref_name);
+                       }
+               }
+
                public int Line {
                        get {
                                return line;
                        }
                }
 
+               public int EffectiveLine {
+                       get {
+                               return ref_line;
+                       }
+                       set {
+                               ref_line = value;
+                       }
+               }
+
                public int Col {
                        get {
                                return col;
@@ -111,6 +147,7 @@ namespace Mono.MonoBASIC
                        keywords.Add ("as", Token.AS);
                        keywords.Add ("assembly", Token.ASSEMBLY);
                        keywords.Add ("auto", Token.AUTO);
+                       keywords.Add ("binary", Token.BINARY); // Not a VB.NET Keyword 
                        keywords.Add ("boolean", Token.BOOLEAN);
                        keywords.Add ("byref", Token.BYREF);
                        keywords.Add ("byte", Token.BYTE);
@@ -129,7 +166,7 @@ namespace Mono.MonoBASIC
                        keywords.Add ("class", Token.CLASS);
                        keywords.Add ("clng", Token.CLNG);
                        keywords.Add ("cobj", Token.COBJ);
-                       //keywords.Add ("compare", Token.COMPARE);
+                       keywords.Add ("compare", Token.COMPARE); // Not a VB.NET Keyword
                        keywords.Add ("const", Token.CONST);
                        keywords.Add ("cshort", Token.CSHORT);
                        keywords.Add ("csng", Token.CSNG);
@@ -141,25 +178,28 @@ namespace Mono.MonoBASIC
                        keywords.Add ("default", Token.DEFAULT);
                        keywords.Add ("delegate", Token.DELEGATE);
                        keywords.Add ("dim", Token.DIM);
+                       keywords.Add ("directcast", Token.DIRECTCAST);                  
                        keywords.Add ("do", Token.DO);
                        keywords.Add ("double", Token.DOUBLE);
                        keywords.Add ("each", Token.EACH);
                        keywords.Add ("else", Token.ELSE);
                        keywords.Add ("elseif", Token.ELSEIF);
                        keywords.Add ("end", Token.END);
+                       keywords.Add ("endif", Token.ENDIF); // An unused VB.NET keyword
                        keywords.Add ("enum", Token.ENUM);
                        keywords.Add ("erase", Token.ERASE);
                        keywords.Add ("error", Token.ERROR);
                        keywords.Add ("event", Token.EVENT);
                        keywords.Add ("exit", Token.EXIT);
-                       //keywords.Add ("explicit", Token.EXPLICIT);
+                       keywords.Add ("explicit", Token.EXPLICIT); // Not a VB.NET keyword 
                        keywords.Add ("false", Token.FALSE);
                        keywords.Add ("finally", Token.FINALLY);
                        keywords.Add ("for", Token.FOR);
                        keywords.Add ("friend", Token.FRIEND);
                        keywords.Add ("function", Token.FUNCTION);
                        keywords.Add ("get", Token.GET);
-                       //keywords.Add ("gettype", Token.GETTYPE);
+                       keywords.Add ("gettype", Token.GETTYPE);
+                       keywords.Add ("gosub", Token.GOSUB); // An unused VB.NET keyword 
                        keywords.Add ("goto", Token.GOTO);
                        keywords.Add ("handles", Token.HANDLES);
                        keywords.Add ("if", Token.IF);
@@ -170,9 +210,9 @@ namespace Mono.MonoBASIC
                        keywords.Add ("integer", Token.INTEGER);
                        keywords.Add ("interface", Token.INTERFACE);
                        keywords.Add ("is", Token.IS);
-                       keywords.Add ("let ", Token.LET );
+                       keywords.Add ("let ", Token.LET ); // An unused VB.NET keyword
                        keywords.Add ("lib ", Token.LIB );
-                       keywords.Add ("like ", Token.LIKE );
+                       keywords.Add ("like", Token.LIKE );
                        keywords.Add ("long", Token.LONG);
                        keywords.Add ("loop", Token.LOOP);
                        keywords.Add ("me", Token.ME);
@@ -190,6 +230,7 @@ namespace Mono.MonoBASIC
                        keywords.Add ("notinheritable", Token.NOTINHERITABLE);
                        keywords.Add ("notoverridable", Token.NOTOVERRIDABLE);
                        keywords.Add ("object", Token.OBJECT);
+                       keywords.Add ("off", Token.OFF); // Not a VB.NET Keyword 
                        keywords.Add ("on", Token.ON);
                        keywords.Add ("option", Token.OPTION);
                        keywords.Add ("optional", Token.OPTIONAL);
@@ -217,14 +258,16 @@ namespace Mono.MonoBASIC
                        keywords.Add ("shared", Token.SHARED);
                        keywords.Add ("short", Token.SHORT);
                        keywords.Add ("single", Token.SINGLE);
-                       keywords.Add ("sizeof", Token.SIZEOF);
+                       keywords.Add ("sizeof", Token.SIZEOF); // Not a VB.NET Keyword 
                        keywords.Add ("static", Token.STATIC);
                        keywords.Add ("step", Token.STEP);
                        keywords.Add ("stop", Token.STOP);
+                       keywords.Add ("strict", Token.STRICT); // Not a VB.NET Keyword 
                        keywords.Add ("string", Token.STRING);
                        keywords.Add ("structure", Token.STRUCTURE);
                        keywords.Add ("sub", Token.SUB);
                        keywords.Add ("synclock", Token.SYNCLOCK);
+                       keywords.Add ("text", Token.TEXT); // Not a VB.NET Keyword
                        keywords.Add ("then", Token.THEN);
                        keywords.Add ("throw", Token.THROW);
                        keywords.Add ("to", Token.TO);
@@ -233,18 +276,21 @@ namespace Mono.MonoBASIC
                        keywords.Add ("typeof", Token.TYPEOF);
                        keywords.Add ("unicode", Token.UNICODE);
                        keywords.Add ("until", Token.UNTIL);
-                       keywords.Add ("variant", Token.VARIANT);
+                       keywords.Add ("variant", Token.VARIANT); // An unused VB.NET keyword
+                       keywords.Add ("wend", Token.WEND); // An unused VB.NET keyword
                        keywords.Add ("when", Token.WHEN);
                        keywords.Add ("while", Token.WHILE);
                        keywords.Add ("with", Token.WITH);
                        keywords.Add ("withevents", Token.WITHEVENTS);
                        keywords.Add ("writeonly", Token.WRITEONLY);
                        keywords.Add ("xor", Token.XOR);
+
+                       if (Parser.UseExtendedSyntax){
+                               keywords.Add ("yield", Token.YIELD);
+                       }
+
                }
 
-               //
-               // Class initializer
-               // 
                static Tokenizer ()
                {
                        initTokens ();
@@ -253,12 +299,24 @@ namespace Mono.MonoBASIC
                        styles = NumberStyles.AllowExponent | NumberStyles.AllowDecimalPoint;
                }
 
+               public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
+               {
+                       this.Source = fname;
+
+                       reader = input;
+
+                       // putback an EOL at the beginning of a stream. This is a convenience that 
+                       // allows pre-processor directives to be added to the beginning of a vb file.
+                       putback('\n');
+               }
+
                bool is_keyword (string name)
                {
                        bool res;
+                       name = name.ToLower();
 
-                       res = keywords.Contains(name.ToLower());
-                       if ((name == "get" || name == "set") && handle_get_set == false)
+                       res = keywords.Contains(name);
+                       if ((name == "GET" || name == "SET") && handle_get_set == false)
                                return false;
                        return res;
                }
@@ -270,7 +328,7 @@ namespace Mono.MonoBASIC
                
                public Location Location {
                        get {
-                               return new Location (ref_line);
+                               return new Location (ref_line, col);
                        }
                }
                
@@ -296,12 +354,15 @@ namespace Mono.MonoBASIC
 
                int is_punct (char c, ref bool doread)
                {
-                       int idx = "{}[](),:;~+-*/%&|^!=<>?".IndexOf (c);
                        int d;
                        int t;
 
                        doread = false;
-
+                       
+                       error_details = c.ToString();
+                       
+                       d = peekChar ();
+                       
                        switch (c){
                        case '[':
                                return Token.OPEN_BRACKET;
@@ -317,72 +378,76 @@ namespace Mono.MonoBASIC
                                return Token.CLOSE_PARENS;
                        case ',':
                                return Token.COMMA;
-                       //case ':':
-                       //      return Token.COLON;
                        case '?':
                                return Token.INTERR;
+                       case '!':
+                               if (is_identifier_start_character((char)d) || cant_have_a_type_character)
+                                       return Token.EXCLAMATION;
+                               return Token.SINGLETYPECHAR;
+                       case '$':
+                               if (cant_have_a_type_character)
+                                       return Token.ERROR;
+                               return Token.DOLAR_SIGN;
+                       case '@':
+                               if (cant_have_a_type_character)
+                                       return Token.ERROR;
+                               return Token.AT_SIGN;
+                       case '%':
+                               if (cant_have_a_type_character)
+                                       return Token.ERROR;
+                               return Token.PERCENT;
+                       case '#':
+                               if(tokens_seen)
+                               {
+                                       if (cant_have_a_type_character) 
+                                               return ExtractDateTimeLiteral();
+                                       else
+                                               return Token.NUMBER_SIGN;
+                               }
+                               else 
+                               {
+                                       tokens_seen = true;
+                                       return Token.HASH;
+                               } 
                        case '&':
-                               return Token.OP_CONCAT;                         
+                               if (!cant_have_a_type_character)
+                                       return Token.LONGTYPECHAR;
+                               t = handle_integer_literal_in_other_bases(d);
+                               if (t == Token.NONE) {
+                                       t = Token.OP_CONCAT;
+                               }
+                               return t;                       
                        }
 
-                       d = peekChar ();
                        if (c == '+'){
-                               
                                if (d == '+')
                                        t = Token.OP_INC;
-                               else if (d == '=')
-                                       t = Token.OP_ADD_ASSIGN;
-                               else
+                               else 
                                        return Token.PLUS;
                                doread = true;
                                return t;
                        }
                        if (c == '-'){
-                               if (d == '=')
-                                       t = Token.OP_SUB_ASSIGN;
-                               else
-                                       return Token.MINUS;
-                               doread = true;
-                               return t;
+                               return Token.MINUS;
                        }
 
                        if (c == '='){
-                               /*if (d == '='){
-                                       doread = true;
-                                       return Token.OP_EQ;
-                               }*/
                                return Token.ASSIGN;
                        }
 
                        if (c == '*'){
-                               if (d == '='){
-                                       doread = true;
-                                       return Token.OP_MULT_ASSIGN;
-                               }
                                return Token.STAR;
                        }
 
                        if (c == '/'){
-                               if (d == '='){
-                                       doread = true;
-                                       return Token.OP_DIV_ASSIGN;
-                               }
                                return Token.DIV;
                        }
 
                        if (c == '\\'){
-                               if (d == '='){
-                                       doread = true;
-                                       return Token.OP_IDIV_ASSIGN;
-                               }
                                return Token.OP_IDIV;
                        }
 
                        if (c == '^'){
-                               if (d == '='){
-                                       doread = true;
-                                       return Token.OP_EXP_ASSIGN;
-                               }
                                return Token.OP_EXP;
                        }
 
@@ -396,6 +461,11 @@ namespace Mono.MonoBASIC
                                        doread = true;
                                        return Token.OP_LE;
                                }
+                               if (d == '<')
+                               {
+                                       doread = true;
+                                       return Token.OP_SHIFT_LEFT;
+                               }
                                return Token.OP_LT;
                        }
 
@@ -404,8 +474,14 @@ namespace Mono.MonoBASIC
                                        doread = true;
                                        return Token.OP_GE;
                                }
+                               if (d == '>')
+                               {
+                                       doread = true;
+                                       return Token.OP_SHIFT_RIGHT;
+                               }
                                return Token.OP_GT;
                        }
+                       
                        if (c == ':'){
                                if (d == '='){
                                        doread = true;
@@ -413,6 +489,7 @@ namespace Mono.MonoBASIC
                                }
                                return Token.COLON;
                        }                       
+                       
                        return Token.ERROR;
                }
 
@@ -423,7 +500,6 @@ namespace Mono.MonoBASIC
                        
                        if (c != -1)
                                number.Append ((char) c);
-                       
                        while ((d = peekChar ()) != -1){
                                if (Char.IsDigit ((char)d)){
                                        number.Append ((char) d);
@@ -435,23 +511,6 @@ namespace Mono.MonoBASIC
                        return seen_digits;
                }
 
-               void hex_digits (int c)
-               {
-                       int d;
-
-                       if (c != -1)
-                               number.Append ((char) c);
-                       while ((d = peekChar ()) != -1){
-                               char e = Char.ToUpper ((char) d);
-                               
-                               if (Char.IsDigit (e) ||
-                                   (e >= 'A' && e <= 'F')){
-                                       number.Append ((char) e);
-                                       getChar ();
-                               } else
-                                       break;
-                       }
-               }
                
                int real_type_suffix (int c)
                {
@@ -461,10 +520,10 @@ namespace Mono.MonoBASIC
                        case 'F': case 'f':
                                t =  Token.LITERAL_SINGLE;
                                break;
-                       case 'D': case 'd':
+                       case 'R': case 'r':
                                t = Token.LITERAL_DOUBLE;
                                break;
-                       case 'M': case 'm':
+                       case 'D': case 'd':
                                 t= Token.LITERAL_DECIMAL;
                                break;
                        default:
@@ -476,23 +535,54 @@ namespace Mono.MonoBASIC
 
                int integer_type_suffix (int c)
                {
-                       // FIXME: Handle U and L suffixes.
-                       // We also need to see in which kind of
-                       // Int the thing fits better according to the spec.
-                       return Token.LITERAL_INTEGER;
+                       int t;
+                       
+                       try {
+                       
+                               switch (c){
+                               case 'S': case 's':
+                                       t =  Token.LITERAL_INTEGER; // SHORT ?
+                       
+                               // hexadecimal literals - like &H8000S is "-32768" 
+                               // and not an overflow exception 
+                               // Check for other literals ???
+
+                                       if(lon == 32768) {
+                                                val = (short) lon;
+                                       }
+                                       else 
+                                               val = ((IConvertible)val).ToInt16(null);
+                                       break;
+                               case 'I': case 'i':
+                                       t = Token.LITERAL_INTEGER;
+                                       val = ((IConvertible)val).ToInt32(null);
+                                       break;
+                               case 'L': case 'l':
+                                        t= Token.LITERAL_INTEGER; // LONG ?
+                                        val = ((IConvertible)val).ToInt64(null);
+                                       break;
+                               default:
+                                       if ((long)val <= System.Int32.MaxValue &&
+                                               (long)val >= System.Int32.MinValue) {
+                                               val = ((IConvertible)val).ToInt32(null);
+                                               return Token.LITERAL_INTEGER;
+                                       } else {
+                                               val = ((IConvertible)val).ToInt64(null);
+                                               return Token.LITERAL_INTEGER; // LONG ?
+                                       }
+                               }
+                               getChar ();
+                               return t;
+                       } catch (Exception e) {
+                               val = e.ToString();
+                               return Token.ERROR;
+                       }
                }
                
-               void adjust_int (int t)
-               {
-                       val = new System.Int32();
-                       val = System.Int32.Parse (number.ToString (), 0);
-               }
-
                int adjust_real (int t)
                {
                        string s = number.ToString ();
 
-                       Console.WriteLine (s);
                        switch (t){
                        case Token.LITERAL_DECIMAL:
                                val = new System.Decimal ();
@@ -520,42 +610,94 @@ namespace Mono.MonoBASIC
                        return t;
                }
 
+               long hex_digits ()
+               {
+                       StringBuilder hexNumber = new StringBuilder ();
+                       
+                       int d;
+
+                       while ((d = peekChar ()) != -1){
+                               char e = Char.ToUpper ((char) d);
+                               
+                               if (Char.IsDigit (e) || (e >= 'A' && e <= 'F')){
+                                       hexNumber.Append (e);
+                                       getChar ();
+                               } else
+                                       break;
+                       }
+                       lon = System.Int64.Parse (hexNumber.ToString(), NumberStyles.HexNumber);
+                       return lon;
+               }
+
+               long octal_digits ()
+               {
+                       long valueToReturn = 0;
+                       
+                       int d;
+
+                       while ((d = peekChar ()) != -1){
+                               char e = (char)d;                       
+                               if (Char.IsDigit (e) && (e < '8')){
+                                       valueToReturn *= 8;
+                                       valueToReturn += (d - (int)'0');
+                                       getChar ();
+                               } else
+                                       break;
+                       }
+                       
+                       return valueToReturn;
+               }
+
+               int handle_integer_literal_in_other_bases(int peek)
+               {
+                       if (peek == 'h' || peek == 'H'){
+                               getChar ();
+                               val = hex_digits ();
+                               return integer_type_suffix (peekChar ());
+                       }
+                       
+                       if (peek == 'o' || peek == 'O'){
+                               getChar ();
+                               val = octal_digits ();
+                               return integer_type_suffix (peekChar ());
+                       }
+                       
+                       return Token.NONE;
+               }
+               
                //
                // Invoked if we know we have .digits or digits
                //
                int is_number (int c)
                {
                        bool is_real = false;
-                       number = new System.Text.StringBuilder ();
+                       number = new StringBuilder ();
                        int type;
+                       bool non_prefixdecimal = false; //To capture decimals like .50
 
                        number.Length = 0;
 
                        if (Char.IsDigit ((char)c)){
-                               if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){
-                                       getChar ();
-                                       hex_digits (-1);
-                                       val = new System.Int32 ();
-                                       val = System.Int32.Parse (number.ToString (), NumberStyles.HexNumber);
-                                       return integer_type_suffix (peekChar ());
-                               }
                                decimal_digits (c);
-                               c = getChar ();
+                               c = peekChar ();        
+                               non_prefixdecimal = true;
                        }
 
                        //
                        // We need to handle the case of
-                       // "1.1" vs "1.string" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
+                       // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
                        //
                        if (c == '.'){
-                               if (decimal_digits ('.')){
+                               if (non_prefixdecimal == false)
+                                        putback ('.');
+                               if (decimal_digits (getChar())){
                                        is_real = true;
                                        c = peekChar ();
                                } else {
                                        putback ('.');
                                        number.Length -= 1;
-                                       adjust_int (Token.LITERAL_INTEGER);
-                                       return Token.LITERAL_INTEGER;
+                                       val = System.Int64.Parse(number.ToString());
+                                       return integer_type_suffix('.');
                                }
                        }
                        
@@ -580,60 +722,13 @@ namespace Mono.MonoBASIC
 
                        type = real_type_suffix (c);
                        if (type == Token.NONE && !is_real){
-                               type = integer_type_suffix (c);
-                               adjust_int (type);
-                               putback (c);
-                               return type;
-                       } else
-                               is_real = true;
-
-                       if (is_real)
-                               return adjust_real (type);
-
-                       Console.WriteLine ("This should not be reached");
-                       throw new Exception ("Is Number should never reach this point");
-               }
-                       
-               int escape (int c)
-               {
-                       int d;
-                       int v;
-
-                       d = peekChar ();
-                       if (c != '\\')
-                               return c;
-                       
-                       switch (d){
-                       case 'a':
-                               v = '\a'; break;
-                       case 'b':
-                               v = '\b'; break;
-                       case 'n':
-                               v = '\n'; break;
-                       case 't':
-                               v = '\t'; break;
-                       case 'v':
-                               v = '\v'; break;
-                       case 'r':
-                               v = 'c'; break;
-                       case '\\':
-                               v = '\\'; break;
-                       case 'f':
-                               v = '\f'; break;
-                       case '0':
-                               v = 0; break;
-                       case '"':
-                               v = '"'; break;
-                       case '\'':
-                               v = '\''; break;
-                       default:
-                               error_details = "cs1009: Unrecognized escape sequence " + (char)d;
-                               return -1;
+                               val = System.Int64.Parse(number.ToString());
+                               return integer_type_suffix(c);
                        }
-                       getChar ();
-                       return v;
+                       
+                       return adjust_real (type);
                }
-
+                       
                int getChar ()
                {
                        if (putback_char != -1){
@@ -651,6 +746,7 @@ namespace Mono.MonoBASIC
                                return putback_char;
                        return reader.Peek ();
                }
+               
 
                void putback (int c)
                {
@@ -677,39 +773,72 @@ namespace Mono.MonoBASIC
 
                private bool IsEOL(int currentChar)
                {
-                       if (currentChar ==  0x0D)
-                       {
+                       bool retVal;
+                       
+                       if (currentChar ==  0x0D) {
                                if (peekChar() ==  0x0A) // if it is a CR-LF pair consume LF also
                                        getChar();
 
-                               return true;
+                               retVal = true;
                        }
-                       return (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
+                       else {
+                               retVal = (currentChar ==  -1 || currentChar ==  0x0A || currentChar ==  0x2028 || currentChar ==  0x2029);
+                       }
+
+                       if(retVal) {
+                               nextLine();
+                       }
+
+                       return retVal;
                }
 
                private int DropComments()              
                {
-                       int d;
-                       while (!IsEOL(d = getChar ()))
+                       //int d;
+                       while (!IsEOL(/*d =*/ getChar ()))
                                col++;
-                       line++;
-                       ref_line++;
-                       col = 0;
 
                        return Token.EOL;
                }       
+               
+               public bool putbacktoken = false;
+               public bool flag = false;               
+               int next_token;
                        
                public int token ()
                {
-                       int lastToken = current_token;
+                       int before_last_token = last_token;
+                       last_token = current_token;
                        do
                        {
                                current_token = xtoken ();
+                               if(current_token == Token.END) {
+                                       next_token = xtoken();
+                                       putbacktoken = true;
+                                       if (next_token == Token.EOL) 
+                                               return Token.END_EOL;
+                                        else 
+                                               return Token.END;
+                               }       
+                               if (current_token == Token.COLON) {
+                                       next_token = xtoken();
+                                       putbacktoken = true;
+                                       if (next_token == Token.EOL) {
+                                               if (last_token != Token.LABELNAME && last_token != Token.LITERAL_INTEGER) {
+                                                       current_token = Token.EOL;
+                                                       putbacktoken = false;
+                                               }
+                                               else if (before_last_token == Token.GOTO) {
+                                                       current_token = Token.EOL;
+                                                       putbacktoken = false;
+                                               }
+                                       }
+                               }
                                if (current_token == 0) 
                                        return Token.EOF;
                                if (current_token == Token.REM)
                                        current_token = DropComments();
-                       } while (lastToken == Token.EOL && current_token == Token.EOL);
+                       } while (last_token == Token.EOL && current_token == Token.EOL);
 
                        return current_token;
                }
@@ -723,9 +852,16 @@ namespace Mono.MonoBASIC
                                return null;
                }
 
+               private bool IsLabel ()
+               {
+                       char c = (char) peekChar();
+                       //putback (c);
+                       return (c == ':');
+               }
+
                private string GetIdentifier(int c)
                {
-                       System.Text.StringBuilder id = new System.Text.StringBuilder ();
+                       StringBuilder id = new StringBuilder ();
 
                        id.Append ((char) c);
                                
@@ -739,8 +875,33 @@ namespace Mono.MonoBASIC
                                else 
                                        break;
                        }
+                       
+                       cant_have_a_type_character = false;
+                       
+                       return id.ToString();
+               }
 
-                       return id.ToString ();
+               private bool is_doublequote(int currentChar)
+               {
+                       return (currentChar == '"' || 
+                                       currentChar == 0x201C || // unicode left double-quote character
+                                       currentChar == 0x201D);  // unicode right double-quote character
+               }
+               
+               private bool is_whitespace(int c)
+               {
+                       return (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0);
+               }
+               
+               private bool tokens_seen = false;
+               
+               private void nextLine()
+               {
+                       cant_have_a_type_character = true;
+                       line++;
+                       ref_line++;
+                       col = 0;
+                       tokens_seen = false;
                }
 
                public int xtoken ()
@@ -749,24 +910,42 @@ namespace Mono.MonoBASIC
                        bool doread = false;
                        int c;
 
+                       if (putbacktoken == true) {
+                               putbacktoken = false;
+                               return next_token;
+                       }
+       
                        val = null;
                        for (;(c = getChar ()) != -1; col++) {
                        
-                               // Handle line comments.
-                               if (c == '\'')
-                                       return Token.REM;
-                                       
                                // Handle line continuation character
-                               if (c == '_') {
-                                       while ((c = getChar ()) != -1 && (c != '\n')){}
-                                       c = getChar ();                                 
+                               if (c == '_') 
+                               {
+                                       int d = peekChar();
+                                       if (!is_identifier_part_character((char)d)) {
+                                               while ((c = getChar ()) != -1 && !IsEOL(c)) {}
+                                               c = getChar ();
+                                               tokens_seen = true;
+                                       }
                                }
+                                       
+                               
+                               // white space
+                               if (is_whitespace(c)) {
+                                       // expand tabs for location
+                                       if (c == '\t')
+                                               col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
+                                       cant_have_a_type_character = true;
+                                       continue;
+                               }
+                               
+                               // Handle line comments.
+                               if (c == '\'')
+                                       return Token.REM;                                       
+                               
                                // Handle EOL.
                                if (IsEOL(c))
                                {
-                                       line++;
-                                       ref_line++;
-                                       col = 0;
                                        if (current_token == Token.EOL) // if last token was also EOL keep skipping
                                                continue;
                                        return Token.EOL;
@@ -775,10 +954,17 @@ namespace Mono.MonoBASIC
                                // Handle escaped identifiers
                                if (c == '[')
                                {
+                                       bool is_first_token_in_line = !tokens_seen;
                                        if ((val = GetIdentifier()) == null)
                                                break;
                                        if ((c = getChar()) != ']')
                                                break;
+                                       tokens_seen = true;
+                                       if (IsLabel() && is_first_token_in_line)
+                                               return Token.LABELNAME;
+
+                                       if (last_token == Token.GOTO)
+                                               return Token.LABELNAME;
                                        return Token.IDENTIFIER;
                                }
 
@@ -786,120 +972,164 @@ namespace Mono.MonoBASIC
                                if (is_identifier_start_character ((char) c))
                                {
                                        string id;
+                                       bool is_first_token_in_line = !tokens_seen;
                                        if ((id = GetIdentifier(c)) == null)
                                                break;
-                                       if (is_keyword(id))
-                                               return getKeyword(id);
                                        val = id;
+                                       tokens_seen = true;
+                                       if (is_keyword(id) && (current_token != Token.DOT))
+                                               return getKeyword(id);
+
+                                       if (IsLabel() && is_first_token_in_line)
+                                               return Token.LABELNAME;
+
+                                       if (last_token == Token.GOTO)
+                                               return Token.LABELNAME;
                                        return Token.IDENTIFIER;
                                }
 
+                               // Treat string literals
+                               if (is_doublequote(c)) {
+                                       cant_have_a_type_character = true;
+                                       return ExtractStringOrCharLiteral(c);
+                               }
+                       
                                // handle numeric literals
-                               if (c == '.'){
+
+                               if (Char.IsDigit ((char) c))
+                                {
+                                        cant_have_a_type_character = false;
+                                        tokens_seen = true;
+                                        return is_number (c);
+                                }
+
+                               if (c == '.')
+                               {
+                                       cant_have_a_type_character = true;
+                                       tokens_seen = true;
                                        if (Char.IsDigit ((char) peekChar ()))
                                                return is_number (c);
                                        return Token.DOT;
                                }
-                               
-                               if (Char.IsDigit ((char) c))
-                                       return is_number (c);
-
-                               /* For now, limited support for pre-processor commands */
-                               if (col == 1 && c == '#'){
-                                       System.Text.StringBuilder s = new System.Text.StringBuilder ();
-                                       
-                                       while ((c = getChar ()) != -1 && (c != '\n')){
-                                               s.Append ((char) c);
-                                       }
-                                       if (String.Compare (s.ToString (), 0, "line", 0, 4) == 0){
-                                               string arg = s.ToString ().Substring (5);
-                                               int pos;
+                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR) {
+                                       cant_have_a_type_character = true;
 
-                                               if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
-                                                       ref_line = System.Int32.Parse (arg.Substring (0, pos));
-                                                       pos++;
-
-                                                       char [] quotes = { '\"' };
-
-                                                       ref_name = arg.Substring (pos);
-                                                       ref_name.TrimStart (quotes);
-                                                       ref_name.TrimEnd (quotes);
-                                               } else
-                                                       ref_line = System.Int32.Parse (arg);
-                                       }
-                                       line++;
-                                       ref_line++;
-                                       continue;
-                               }
-                               
-                               if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
+                                       if (t == Token.NONE)
+                                               continue;
+                                               
                                        if (doread){
                                                getChar ();
                                                col++;
                                        }
+                                       tokens_seen = true;
                                        return t;
                                }
                                
-                               // Treat string literals
-                               if (c == '"'){
-                                       System.Text.StringBuilder s = new System.Text.StringBuilder ();
-
-                                       while ((c = getChar ()) != -1){
-                                               if (c == '"'){ // TODO: treat double-doublequotes
-                                                       val = s.ToString ();
-                                                       return Token.LITERAL_STRING;
-                                               }
-
-                                               c = escape (c);
-                                               if (c == -1)
-                                                       return Token.ERROR;
-                                               s.Append ((char) c);
-                                       }
-                               }
-                       
-                               // expand tabs for location and ignore it as whitespace
-                               if (c == '\t')
-                               {
-                                       col = (((col + ExpandedTabsSize) / ExpandedTabsSize) * ExpandedTabsSize) - 1;
-                                       continue;
-                               }
-
-                               // white space
-                               if (c == ' ' || c == '\f' || c == '\v')
-                                       continue;
-
                                error_details = ((char)c).ToString ();
-                               
                                return Token.ERROR;
                        }
 
-                       if (current_token != Token.EOL) // if last token wasnยดt EOL send it before EOF
+                       if (current_token != Token.EOL) // if last token wasn't EOL send it before EOF
                                return Token.EOL;
                        
                        return Token.EOF;
                }
 
-               public void cleanup ()
+               private int ExtractDateTimeLiteral()
                {
-/* borrowed from mcs - have to work it to have preprocessing in mbas
+                       int c;
+                       
+                       StringBuilder sb = new StringBuilder();
+                       for (;(c = getChar ()) != -1; col++)
+                       {
+                               if (c == '#') {
+                                       val = ParseDateLiteral(sb);
+                                       return Token.LITERAL_DATE;
+                               }
+                               if (IsEOL(c)) {
+                                       break;
+                               } 
+                               if (c == '-')
+                                       c = '/';
+                               sb.Append((char)c);
+                       }
+                       return Token.ERROR;
+               }
+               
+               private int ExtractStringOrCharLiteral(int c)
+               {
+                       StringBuilder s = new StringBuilder ();
+
+                       tokens_seen = true;
+
+                       while ((c = getChar ()) != -1){
+                               if (is_doublequote(c)){
+                                       if (is_doublequote(peekChar()))
+                                               getChar();
+                                       else {
+                                               //handle Char Literals
+                                               if (peekChar() == 'C' || peekChar() == 'c') {
+                                                       getChar();
+                                                       if (s.Length == 1) {
+                                                               val = s[0];
+                                                               return Token.LITERAL_CHARACTER;
+                                                       } else {
+                                                               val = "Incorrect length for a character literal";
+                                                               return Token.ERROR;
+                                                       }                                                       
+                                               } else {
+                                                       val = s.ToString ();
+                                                       return Token.LITERAL_STRING;
+                                               }
+                                       }
+                               }
 
-                       if (ifstack != null && ifstack.Count >= 1) {
-                               int state = (int) ifstack.Pop ();
-                               if ((state & REGION) != 0)
-                                       Report.Error (1038, "#endregion directive expected");
-                               else 
-                                       Report.Error (1027, "#endif directive expected");
+                               if (IsEOL(c)) {
+                                       return Token.ERROR;
+                               }
+                       
+                               s.Append ((char) c);
                        }
-*/                             
+                                       
+                       return Token.ERROR;
                }
 
-               public Tokenizer (System.IO.TextReader input, string fname, ArrayList defines)
+               static IFormatProvider enUSculture = new CultureInfo("en-US", true);
+
+               private DateTime ParseDateLiteral(StringBuilder value)
                {
-                       this.ref_name = fname;
-                       reader = input;
-                       putback_char = -1;
+                       try
+                       {
+                               return DateTime.Parse(value.ToString(),
+                                                 enUSculture,
+                                                 DateTimeStyles.NoCurrentDateDefault | DateTimeStyles.AllowWhiteSpaces);
+                       }
+                       catch (FormatException ex)
+                       {
+                               //TODO: What is the correct error number and message?
+                               Report.Error (1, Location, string.Format("Invalid date literal '{0}'", value.ToString()) 
+                                       + Environment.NewLine + ex.ToString());
+                       }
+                       catch (Exception)
+                       {
+                               Report.Error (1, Location, "Error parsing date literal");       //TODO: What is the correct error number and message?
+                       }
+                       return new DateTime();
+               }
+               public void PositionCursorAtNextPreProcessorDirective()
+               {
+                       int t;
+                       
+                       for(t = token(); t != Token.HASH && t != Token.EOF ; t = token()); 
+
+                       if(t == Token.EOF)
+                               throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
                        
-                       Location.Push (fname);
+                       if(t == Token.HASH) {
+                               tokens_seen = false;
+                               putback('#');
+                       }
                }
 
        }