X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fmcs%2Fcs-tokenizer.cs;h=5193fa277ef15ba7b5c6a858d9ae33cfb3b838b6;hb=27ac138a66975973b4517b43b9ceda6abe976184;hp=b75dc9a27ba27fa860573838f2253e09ed9ed954;hpb=7e8c804d86bb3a06a915bfbd68a160369a3b06cb;p=mono.git diff --git a/mcs/mcs/cs-tokenizer.cs b/mcs/mcs/cs-tokenizer.cs index b75dc9a27ba..5193fa277ef 100755 --- a/mcs/mcs/cs-tokenizer.cs +++ b/mcs/mcs/cs-tokenizer.cs @@ -6,7 +6,7 @@ // // Licensed under the terms of the GNU GPL // -// (C) 2001 Ximian, Inc (http://www.ximian.com) +// (C) 2001, 2002 Ximian, Inc (http://www.ximian.com) // /* @@ -160,9 +160,14 @@ namespace Mono.CSharp // Stack ifstack; - static System.Text.StringBuilder id_builder; static System.Text.StringBuilder string_builder; - static System.Text.StringBuilder number_builder; + + const int max_id_size = 512; + static char [] id_builder = new char [max_id_size]; + + const int max_number_size = 128; + static char [] number_builder = new char [max_number_size]; + static int number_pos; // // Details about the error encoutered by the tokenizer @@ -186,7 +191,7 @@ namespace Mono.CSharp return col; } } - + static void InitTokens () { keywords = new Hashtable (); @@ -284,28 +289,26 @@ namespace Mono.CSharp csharp_format_info = NumberFormatInfo.InvariantInfo; styles = NumberStyles.Float; - id_builder = new System.Text.StringBuilder (); string_builder = new System.Text.StringBuilder (); - number_builder = new System.Text.StringBuilder (); } - bool is_keyword (string name) + int GetKeyword (string name) { - bool res; + object o = keywords [name]; + + if (o == null) + return -1; - res = keywords.Contains (name); - if (handle_get_set == false && (name == "get" || name == "set")) - return false; - if (handle_remove_add == false && (name == "remove" || name == "add")) - return false; - if (handle_assembly == false && (name == "assembly")) - return false; - return res; - } + int res = (int) o; - int GetKeyword (string name) - { - return (int) (keywords [name]); + if (handle_get_set == false && (res == Token.GET || res == Token.SET)) + return -1; + if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD)) + return -1; + if (handle_assembly == false && res == Token.ASSEMBLY) + return -1; + return res; + } public Location Location { @@ -324,10 +327,11 @@ namespace Mono.CSharp defines [def] = true; } - public Tokenizer (System.IO.Stream input, string fname, ArrayList defs) + public Tokenizer (StreamReader input, string fname, ArrayList defs) { this.ref_name = fname; - reader = new System.IO.StreamReader (input); + reader = input; + putback_char = -1; if (defs != null){ @@ -345,12 +349,12 @@ namespace Mono.CSharp bool is_identifier_start_character (char c) { - return Char.IsLetter (c) || c == '_' ; + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c); } bool is_identifier_part_character (char c) { - return (Char.IsLetter (c) || Char.IsDigit (c) || c == '_'); + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c); } int is_punct (char c, ref bool doread) @@ -516,17 +520,31 @@ namespace Mono.CSharp return Token.ERROR; } + void Error_NumericConstantTooLong () + { + Report.Error (1021, Location, "Numeric constant too long"); + } + bool decimal_digits (int c) { int d; bool seen_digits = false; - if (c != -1) - number_builder.Append ((char) c); + if (c != -1){ + if (number_pos == max_number_size) + Error_NumericConstantTooLong (); + number_builder [number_pos++] = (char) c; + } - while ((d = peekChar ()) != -1){ - if (Char.IsDigit ((char)d)){ - number_builder.Append ((char) d); + // + // We use peekChar2, because decimal_digits needs to do a + // 2-character look-ahead (5.ToString for example). + // + while ((d = peekChar2 ()) != -1){ + if (d >= '0' && d <= '9'){ + if (number_pos == max_number_size) + Error_NumericConstantTooLong (); + number_builder [number_pos++] = (char) d; getChar (); seen_digits = true; } else @@ -536,9 +554,9 @@ namespace Mono.CSharp return seen_digits; } - bool is_hex (char e) + bool is_hex (int e) { - return Char.IsDigit (e) || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f'); + return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f'); } void hex_digits (int c) @@ -546,16 +564,8 @@ namespace Mono.CSharp int d; if (c != -1) - number_builder.Append ((char) c); - while ((d = peekChar ()) != -1){ - char e = Char.ToUpper ((char) d); - - if (is_hex (e)){ - number_builder.Append ((char) e); - getChar (); - } else - break; - } + number_builder [number_pos++] = (char) c; + } int real_type_suffix (int c) @@ -665,8 +675,21 @@ namespace Mono.CSharp int adjust_int (int c) { try { - ulong ul = System.UInt64.Parse (number_builder.ToString ()); - return integer_type_suffix (ul, c); + if (number_pos > 9){ + ulong ul = (uint) (number_builder [0] - '0'); + + for (int i = 1; i < number_pos; i++){ + ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0'))); + } + return integer_type_suffix (ul, c); + } else { + uint ui = (uint) (number_builder [0] - '0'); + + for (int i = 1; i < number_pos; i++){ + ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0'))); + } + return integer_type_suffix (ui, c); + } } catch (OverflowException) { error_details = "Integral constant is too large"; Report.Error (1021, Location, error_details); @@ -677,7 +700,7 @@ namespace Mono.CSharp int adjust_real (int t) { - string s = number_builder.ToString (); + string s = new String (number_builder, 0, number_pos); switch (t){ case Token.LITERAL_DECIMAL: @@ -686,7 +709,7 @@ namespace Mono.CSharp } catch (OverflowException) { val = 0m; error_details = "Floating-point constant is outside the range of the type 'decimal'"; - Report.Error(594, Location, error_details); + Report.Error (594, Location, error_details); } break; case Token.LITERAL_FLOAT: @@ -695,7 +718,7 @@ namespace Mono.CSharp } catch (OverflowException) { val = 0.0f; error_details = "Floating-point constant is outside the range of the type 'float'"; - Report.Error(594, Location, error_details); + Report.Error (594, Location, error_details); } break; @@ -707,13 +730,39 @@ namespace Mono.CSharp } catch (OverflowException) { val = 0.0; error_details = "Floating-point constant is outside the range of the type 'double'"; - Report.Error(594, Location, error_details); + Report.Error (594, Location, error_details); } break; } return t; } + int handle_hex () + { + int d; + ulong ul; + + getChar (); + while ((d = peekChar ()) != -1){ + if (is_hex (d)){ + if (number_pos == 16){ + Report.Error (1021, Location, "Integral constant too large"); + return Token.ERROR; + } + number_builder [number_pos++] = (char) d; + getChar (); + } else + break; + } + + string s = new String (number_builder, 0, number_pos); + if (number_pos <= 8) + ul = System.UInt32.Parse (s, NumberStyles.HexNumber); + else + ul = System.UInt64.Parse (s, NumberStyles.HexNumber); + return integer_type_suffix (ul, peekChar ()); + } + // // Invoked if we know we have .digits or digits // @@ -722,18 +771,14 @@ namespace Mono.CSharp bool is_real = false; int type; - number_builder.Length = 0; + number_pos = 0; - if (Char.IsDigit ((char)c)){ - if (c == '0' && peekChar () == 'x' || peekChar () == 'X'){ - ulong ul; - getChar (); - hex_digits (-1); + if (c >= '0' && c <= '9'){ + if (c == '0'){ + int peek = peekChar (); - string s = number_builder.ToString (); - - ul = System.UInt64.Parse (s, NumberStyles.HexNumber); - return integer_type_suffix (ul, peekChar ()); + if (peek == 'x' || peek == 'X') + return handle_hex (); } decimal_digits (c); c = getChar (); @@ -749,21 +794,35 @@ namespace Mono.CSharp c = getChar (); } else { putback ('.'); - number_builder.Length -= 1; + number_pos--; return adjust_int (-1); } } if (c == 'e' || c == 'E'){ is_real = true; - number_builder.Append ("e"); + if (number_pos == max_number_size) + Error_NumericConstantTooLong (); + number_builder [number_pos++] = 'e'; c = getChar (); - if (c == '+') - number_builder.Append ((char) c); - else if (c == '-') - number_builder.Append ((char) c); - decimal_digits (-1); + if (c == '+'){ + if (number_pos == max_number_size) + Error_NumericConstantTooLong (); + number_builder [number_pos++] = '+'; + c = -1; + } else if (c == '-') { + if (number_pos == max_number_size) + Error_NumericConstantTooLong (); + number_builder [number_pos++] = '-'; + c = -1; + } else { + if (number_pos == max_number_size) + Error_NumericConstantTooLong (); + number_builder [number_pos++] = '+'; + } + + decimal_digits (c); c = getChar (); } @@ -794,23 +853,24 @@ namespace Mono.CSharp int i; int total = 0; int c; - char e; int top = count != -1 ? count : 4; getChar (); error = false; for (i = 0; i < top; i++){ c = getChar (); - e = Char.ToUpper ((char) c); - if (!is_hex (e)){ + if (c >= '0' && c <= '9') + c = (int) c - (int) '0'; + else if (c >= 'A' && c <= 'F') + c = (int) c - (int) 'A' + 10; + else if (c >= 'a' && c <= 'f') + c = (int) c - (int) 'a' + 10; + else { error = true; return 0; } - if (Char.IsDigit (e)) - c = (int) e - (int) '0'; - else - c = (int) e - (int) 'A' + 10; + total = (total * 16) + c; if (count == -1){ int p = peekChar (); @@ -873,7 +933,7 @@ namespace Mono.CSharp return v; default: Report.Error (1009, Location, "Unrecognized escape sequence in " + (char)d); - return -1; + return d; } getChar (); return v; @@ -894,13 +954,26 @@ namespace Mono.CSharp { if (putback_char != -1) return putback_char; - return reader.Peek (); + putback_char = reader.Read (); + return putback_char; } - void putback (int c) + int peekChar2 () { if (putback_char != -1) + return putback_char; + return reader.Peek (); + } + + void putback (int c) + { + if (putback_char != -1){ + Console.WriteLine ("Col: " + col); + Console.WriteLine ("Row: " + line); + Console.WriteLine ("Name: " + ref_name); + Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c); throw new Exception ("This should not happen putback on putback"); + } putback_char = c; } @@ -936,9 +1009,7 @@ namespace Mono.CSharp arg = ""; static_cmd_arg.Length = 0; - while ((c = getChar ()) != -1 && (c != '\n') && (c != ' ') && (c != '\t')){ - if (c == '\r') - continue; + while ((c = getChar ()) != -1 && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){ static_cmd_arg.Append ((char) c); } @@ -948,31 +1019,34 @@ namespace Mono.CSharp line++; ref_line++; return; - } + } else if (c == '\r') + col = 0; // skip over white space - while ((c = getChar ()) != -1 && (c != '\n') && ((c == ' ') || (c == '\t'))) + while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t'))) ; if (c == '\n'){ line++; ref_line++; return; + } else if (c == '\r'){ + col = 0; + return; } static_cmd_arg.Length = 0; static_cmd_arg.Append ((char) c); - while ((c = getChar ()) != -1 && (c != '\n')){ - if (c == '\r') - continue; + while ((c = getChar ()) != -1 && (c != '\n') && (c != '\r')){ static_cmd_arg.Append ((char) c); } if (c == '\n'){ line++; ref_line++; - } + } else if (c == '\r') + col = 0; arg = static_cmd_arg.ToString ().Trim (); } @@ -998,7 +1072,7 @@ namespace Mono.CSharp char [] quotes = { '\"' }; - ref_name = arg.Substring (pos). Trim(quotes); + ref_name = arg.Substring (pos). Trim (quotes); } else { ref_line = System.Int32.Parse (arg); } @@ -1015,19 +1089,19 @@ namespace Mono.CSharp void PreProcessDefinition (bool is_define, string arg) { if (arg == "" || arg == "true" || arg == "false"){ - Report.Error(1001, Location, "Missing identifer to pre-processor directive"); + Report.Error (1001, Location, "Missing identifer to pre-processor directive"); return; } char[] whitespace = { ' ', '\t' }; if (arg.IndexOfAny (whitespace) != -1){ - Report.Error(1025, Location, "Single-line comment or end-of-line expected"); + Report.Error (1025, Location, "Single-line comment or end-of-line expected"); return; } foreach (char c in arg){ if (!Char.IsLetter (c) && (c != '_')){ - Report.Error(1001, Location, "Identifier expected"); + Report.Error (1001, Location, "Identifier expected"); return; } } @@ -1078,13 +1152,13 @@ namespace Mono.CSharp return false; } - if (is_identifier_start_character(c)){ + if (is_identifier_start_character (c)){ int j = 1; while (j < len){ c = s [j]; - if (is_identifier_part_character(c)){ + if (is_identifier_part_character (c)){ j++; continue; } @@ -1142,7 +1216,7 @@ namespace Mono.CSharp return va != pp_unary (ref s); - } + } } return va; @@ -1159,7 +1233,7 @@ namespace Mono.CSharp if (s [0] == '&'){ if (len > 2 && s [1] == '&'){ s = s.Substring (2); - return va && pp_eq (ref s); + return (va & pp_eq (ref s)); } else { Error_InvalidDirective (); return false; @@ -1175,7 +1249,6 @@ namespace Mono.CSharp bool pp_expr (ref string s) { bool va = pp_and (ref s); - s = s.Trim (); int len = s.Length; if (len > 0){ @@ -1184,14 +1257,14 @@ namespace Mono.CSharp if (c == '|'){ if (len > 2 && s [1] == '|'){ s = s.Substring (2); - return va || pp_and (ref s); + return va | pp_expr (ref s); } else { Error_InvalidDirective (); return false; } } } - + return va; } @@ -1342,15 +1415,21 @@ namespace Mono.CSharp } ifstack.Pop (); - ifstack.Push (state | ELSE_SEEN); + bool ret; if ((state & TAKEN_BEFORE) == 0){ - if ((state & PARENT_TAKING) != 0) - return true; - else - return false; - } - return false; + ret = ((state & PARENT_TAKING) != 0); + } else + ret = false; + + if (ret) + state |= TAKING; + else + state &= ~TAKING; + + ifstack.Push (state | ELSE_SEEN); + + return ret; } } @@ -1391,7 +1470,7 @@ namespace Mono.CSharp } - private int consume_string(bool quoted) + private int consume_string (bool quoted) { int c; string_builder.Length = 0; @@ -1408,9 +1487,14 @@ namespace Mono.CSharp } } - if (c == '\n' && !quoted) { - Report.Error(1010, Location, "Newline in constant"); - } + if (c == '\n'){ + if (!quoted) + Report.Error (1010, Location, "Newline in constant"); + line++; + ref_line++; + col = 0; + } else + col++; if (!quoted){ c = escape (c); @@ -1424,34 +1508,41 @@ namespace Mono.CSharp return Token.EOF; } - private int consume_identifier(int c, bool quoted) + private int consume_identifier (int s, bool quoted) { - id_builder.Length = 0; - - id_builder.Append ((char) c); + int pos = 1; + int c; + + id_builder [0] = (char) s; - while ((c = peekChar ()) != -1) { + while ((c = reader.Read ()) != -1) { if (is_identifier_part_character ((char) c)){ - id_builder.Append ((char)getChar ()); + if (pos == max_id_size){ + Report.Error (645, Location, "Identifier too long (limit is 512 chars)"); + return Token.ERROR; + } + + id_builder [pos++] = (char) c; + putback_char = -1; col++; - } else + } else { + putback_char = c; break; + } } - - string ids = id_builder.ToString (); - if (!is_keyword (ids) || quoted) { - val = ids; - if (ids.Length > 512){ - Report.Error ( - 645, Location, - "Identifier too long (limit is 512 chars)"); + string ids = new String (id_builder, 0, pos); + + if (s >= 'a'){ + int keyword = GetKeyword (ids); + if (keyword == -1 || quoted){ + val = ids; + return Token.IDENTIFIER; } - return Token.IDENTIFIER; + return keyword; } - - // true, false and null are in the hash anyway. - return GetKeyword (ids); + val = ids; + return Token.IDENTIFIER; } public int xtoken () @@ -1463,21 +1554,11 @@ namespace Mono.CSharp val = null; // optimization: eliminate col and implement #directive semantic correctly. for (;(c = getChar ()) != -1; col++) { - if (is_identifier_start_character((char)c)){ - tokens_seen = true; - return consume_identifier(c, false); - } - - if (c == '.'){ - tokens_seen = true; - if (Char.IsDigit ((char) peekChar ())) - return is_number (c); - return Token.DOT; - } - - if (Char.IsDigit ((char) c)){ - tokens_seen = true; - return is_number (c); + if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r' || c == 0xa0){ + + if (c == '\t') + col = (((col + 8) / 8) * 8) - 1; + continue; } // Handle double-slash comments. @@ -1486,11 +1567,13 @@ namespace Mono.CSharp if (d == '/'){ getChar (); - while ((d = getChar ()) != -1 && (d != '\n')) + while ((d = getChar ()) != -1 && (d != '\n') && d != '\r') col++; - line++; - ref_line++; - col = 0; + if (d == '\n'){ + line++; + ref_line++; + col = 0; + } any_token_seen |= tokens_seen; tokens_seen = false; continue; @@ -1513,8 +1596,48 @@ namespace Mono.CSharp } continue; } + goto is_punct_label; } + + if (is_identifier_start_character ((char)c)){ + tokens_seen = true; + return consume_identifier (c, false); + } + + is_punct_label: + if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){ + tokens_seen = true; + if (doread){ + getChar (); + col++; + } + return t; + } + + // white space + if (c == '\n'){ + line++; + ref_line++; + col = 0; + any_token_seen |= tokens_seen; + tokens_seen = false; + continue; + } + + if (c >= '0' && c <= '9'){ + tokens_seen = true; + return is_number (c); + } + + if (c == '.'){ + tokens_seen = true; + int peek = peekChar (); + if (peek >= '0' && peek <= '9') + return is_number (c); + return Token.DOT; + } + /* For now, ignore pre-processor commands */ // FIXME: In C# the '#' is not limited to appear // on the first column. @@ -1538,7 +1661,7 @@ namespace Mono.CSharp line++; ref_line++; skipping = false; - } else if (c == ' ' || c == '\t' || c == '\v' || c == '\r') + } else if (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0) continue; else if (c != '#') skipping = true; @@ -1552,18 +1675,8 @@ namespace Mono.CSharp continue; } - if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){ - tokens_seen = true; - if (doread){ - getChar (); - col++; - } - return t; - } - - if (c == '"') { - return consume_string(false); - } + if (c == '"') + return consume_string (false); if (c == '\''){ c = getChar (); @@ -1586,8 +1699,13 @@ namespace Mono.CSharp // Try to recover, read until newline or next "'" while ((c = getChar ()) != -1){ - if (c == '\n' || c == '\'') + if (c == '\n' || c == '\''){ + line++; + ref_line++; + col = 0; break; + } else + col++; } return Token.ERROR; @@ -1595,31 +1713,15 @@ namespace Mono.CSharp return Token.LITERAL_CHARACTER; } - // white space - if (c == '\n'){ - line++; - ref_line++; - col = 0; - any_token_seen |= tokens_seen; - tokens_seen = false; - continue; - } - - if (c == ' ' || c == '\t' || c == '\f' || c == '\v' || c == '\r'){ - if (c == '\t') - col = (((col + 8) / 8) * 8) - 1; - continue; - } - if (c == '@') { - c = getChar(); + c = getChar (); if (c == '"') { tokens_seen = true; - return consume_string(true); - } else if (is_identifier_start_character((char) c)){ - return consume_identifier(c, true); + return consume_string (true); + } else if (is_identifier_start_character ((char) c)){ + return consume_identifier (c, true); } else { - Report.Error(1033, Location, "'@' must be followed by string constant or identifier"); + Report.Error (1033, Location, "'@' must be followed by string constant or identifier"); } }