X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fmcs%2Fcs-tokenizer.cs;h=5d1c92c54159e13f22995161cdc23ca7d69ac297;hb=f448841be113e8e9e19c9dca1fb9348fb7db3ee1;hp=adddd3ba5b1f4025d0ca1b257e61edb1b5512dd3;hpb=5980d617f27d174552860b89f56612474ffa4b6b;p=mono.git diff --git a/mcs/mcs/cs-tokenizer.cs b/mcs/mcs/cs-tokenizer.cs index adddd3ba5b1..5d1c92c5415 100644 --- a/mcs/mcs/cs-tokenizer.cs +++ b/mcs/mcs/cs-tokenizer.cs @@ -14,7 +14,7 @@ using System; using System.Text; -using System.Collections; +using System.Collections.Generic; using System.IO; using System.Globalization; using System.Reflection; @@ -27,22 +27,141 @@ namespace Mono.CSharp public class Tokenizer : yyParser.yyInput { + class KeywordEntry + { + public readonly int Token; + public KeywordEntry Next; + public readonly char[] Value; + + public KeywordEntry (string value, int token) + { + this.Value = value.ToCharArray (); + this.Token = token; + } + } + + sealed class IdentifiersComparer : IEqualityComparer + { + readonly int length; + + public IdentifiersComparer (int length) + { + this.length = length; + } + + public bool Equals (char[] x, char[] y) + { + for (int i = 0; i < length; ++i) + if (x [i] != y [i]) + return false; + + return true; + } + + public int GetHashCode (char[] obj) + { + int h = 0; + for (int i = 0; i < length; ++i) + h = (h << 5) - h + obj [i]; + + return h; + } + } + + // + // This class has to be used in the parser only, it reuses token + // details after each parse + // + public class LocatedToken + { + int row, column; + string value; + + static LocatedToken[] buffer; + static int pos; + + private LocatedToken () + { + } + + public static LocatedToken Create (int row, int column) + { + return Create (null, row, column); + } + + public static LocatedToken Create (string value, int row, int column) + { + // + // TODO: I am not very happy about the logic but it's the best + // what I could come up with for now. + // Ideally we should be using just tiny buffer (256 elements) which + // is enough to hold all details for currect stack and recycle elements + // poped from the stack but there is a trick needed to recycle + // them properly. + // + LocatedToken entry; + if (pos >= buffer.Length) { + entry = new LocatedToken (); + } else { + entry = buffer [pos]; + if (entry == null) { + entry = new LocatedToken (); + buffer [pos] = entry; + } + + ++pos; + } + entry.value = value; + entry.row = row; + entry.column = column; + return entry; + } + + // + // Used for token not required by expression evaluator + // + public static LocatedToken CreateOptional (int row, int col) + { +#if false + return Create (row, col); +#endif + return null; + } + + public static void Initialize () + { + if (buffer == null) + buffer = new LocatedToken [10000]; + pos = 0; + } + + public Location Location { + get { return new Location (row, column); } + } + + public string Value { + get { return value; } + } + } + SeekableStreamReader reader; SourceFile ref_name; CompilationUnit file_name; + CompilerContext context; bool hidden = false; int ref_line = 1; int line = 1; int col = 0; int previous_col; int current_token; + int tab_size; bool handle_get_set = false; bool handle_remove_add = false; bool handle_where = false; bool handle_typeof = false; bool lambda_arguments_parsing; Location current_comment_location = Location.Null; - ArrayList escaped_identifiers; + List escaped_identifiers; int parsing_generic_less_than; // @@ -51,7 +170,12 @@ namespace Mono.CSharp // scope only // public int parsing_block; - internal int query_parsing; + internal bool query_parsing; + + // + // When parsing type only, useful for ambiguous nullable types + // + public int parsing_type; // // Set when parsing generic declaration (type or method header) @@ -94,6 +218,14 @@ namespace Mono.CSharp // bool tokens_seen = false; + // + // Set to true once the GENERATE_COMPLETION token has bee + // returned. This helps produce one GENERATE_COMPLETION, + // as many COMPLETE_COMPLETION as necessary to complete the + // AST tree and one final EOF. + // + bool generated; + // // Whether a token has been seen on the file // This is needed because `define' is not allowed to be used @@ -122,6 +254,11 @@ namespace Mono.CSharp get { return handle_typeof; } set { handle_typeof = value; } } + + public int TabSize { + get { return tab_size; } + set { tab_size = value; } + } public XmlCommentState doc_state { get { return xml_doc_state; } @@ -134,19 +271,23 @@ namespace Mono.CSharp } } - void AddEscapedIdentifier (LocatedToken lt) + // + // This is used to trigger completion generation on the parser + public bool CompleteOnEOF; + + void AddEscapedIdentifier (Location loc) { if (escaped_identifiers == null) - escaped_identifiers = new ArrayList (); + escaped_identifiers = new List (); - escaped_identifiers.Add (lt); + escaped_identifiers.Add (loc); } public bool IsEscapedIdentifier (Location loc) { if (escaped_identifiers != null) { - foreach (LocatedToken lt in escaped_identifiers) - if (lt.Location.Equals (loc)) + foreach (Location lt in escaped_identifiers) + if (lt.Equals (loc)) return true; } @@ -156,8 +297,8 @@ namespace Mono.CSharp // // Class variables // - static CharArrayHashtable[] keywords; - static Hashtable keyword_strings; + static KeywordEntry[][] keywords; + static Dictionary keyword_strings; // TODO: HashSet static NumberStyles styles; static NumberFormatInfo csharp_format_info; @@ -165,7 +306,7 @@ namespace Mono.CSharp // Values for the associated token returned // internal int putback_char; // Used by repl only - Object val; + object val; // // Pre-processor @@ -178,18 +319,20 @@ namespace Mono.CSharp // // pre-processor if stack state: // - Stack ifstack; + Stack ifstack; static System.Text.StringBuilder string_builder; const int max_id_size = 512; static char [] id_builder = new char [max_id_size]; - static CharArrayHashtable [] identifiers = new CharArrayHashtable [max_id_size + 1]; + public static Dictionary[] identifiers = new Dictionary[max_id_size + 1]; const int max_number_size = 512; static char [] number_builder = new char [max_number_size]; static int number_pos; + + static StringBuilder static_cmd_arg = new System.Text.StringBuilder (); // // Details about the error encoutered by the tokenizer @@ -214,7 +357,8 @@ namespace Mono.CSharp // on its own to deamiguate a token in behalf of the // parser. // - Stack position_stack = new Stack (2); + Stack position_stack = new Stack (2); + class Position { public int position; public int line; @@ -223,9 +367,10 @@ namespace Mono.CSharp public bool hidden; public int putback_char; public int previous_col; - public Stack ifstack; + public Stack ifstack; public int parsing_generic_less_than; public int current_token; + public object val; public Position (Tokenizer t) { @@ -236,10 +381,16 @@ namespace Mono.CSharp hidden = t.hidden; putback_char = t.putback_char; previous_col = t.previous_col; - if (t.ifstack != null && t.ifstack.Count != 0) - ifstack = (Stack)t.ifstack.Clone (); + if (t.ifstack != null && t.ifstack.Count != 0) { + // There is no simple way to clone Stack all + // methods reverse the order + var clone = t.ifstack.ToArray (); + Array.Reverse (clone); + ifstack = new Stack (clone); + } parsing_generic_less_than = t.parsing_generic_less_than; current_token = t.current_token; + val = t.val; } } @@ -250,7 +401,7 @@ namespace Mono.CSharp public void PopPosition () { - Position p = (Position) position_stack.Pop (); + Position p = position_stack.Pop (); reader.Position = p.position; ref_line = p.ref_line; @@ -262,6 +413,7 @@ namespace Mono.CSharp ifstack = p.ifstack; parsing_generic_less_than = p.parsing_generic_less_than; current_token = p.current_token; + val = p.val; } // Do not reset the position, ignore it. @@ -272,17 +424,33 @@ namespace Mono.CSharp static void AddKeyword (string kw, int token) { - keyword_strings.Add (kw, kw); - if (keywords [kw.Length] == null) { - keywords [kw.Length] = new CharArrayHashtable (kw.Length); + keyword_strings.Add (kw, null); + + int length = kw.Length; + if (keywords [length] == null) { + keywords [length] = new KeywordEntry ['z' - '_' + 1]; } - keywords [kw.Length] [kw.ToCharArray ()] = token; + + int char_index = kw [0] - '_'; + KeywordEntry kwe = keywords [length] [char_index]; + if (kwe == null) { + keywords [length] [char_index] = new KeywordEntry (kw, token); + return; + } + + while (kwe.Next != null) { + kwe = kwe.Next; + } + + kwe.Next = new KeywordEntry (kw, token); } static void InitTokens () { - keyword_strings = new Hashtable (); - keywords = new CharArrayHashtable [64]; + keyword_strings = new Dictionary (); + + // 11 is the length of the longest keyword for now + keywords = new KeywordEntry [11] []; AddKeyword ("__arglist", Token.ARGLIST); AddKeyword ("abstract", Token.ABSTRACT); @@ -389,12 +557,7 @@ namespace Mono.CSharp // static Tokenizer () { - Reset (); - } - - public static void Reset () - { - InitTokens (); + InitTokens (); csharp_format_info = NumberFormatInfo.InvariantInfo; styles = NumberStyles.Float; @@ -403,20 +566,37 @@ namespace Mono.CSharp int GetKeyword (char[] id, int id_len) { - /* - * Keywords are stored in an array of hashtables grouped by their - * length. - */ + // + // Keywords are stored in an array of arrays grouped by their + // length and then by the first character + // + if (id_len >= keywords.Length || keywords [id_len] == null) + return -1; - if ((id_len >= keywords.Length) || (keywords [id_len] == null)) + int first_index = id [0] - '_'; + if (first_index > 'z') return -1; - object o = keywords [id_len] [id]; - if (o == null) + KeywordEntry kwe = keywords [id_len] [first_index]; + if (kwe == null) + return -1; + + int res; + do { + res = kwe.Token; + for (int i = 1; i < id_len; ++i) { + if (id [i] != kwe.Value [i]) { + res = 0; + kwe = kwe.Next; + break; + } + } + } while (res == 0 && kwe != null); + + if (res == 0) return -1; int next_token; - int res = (int) o; switch (res) { case Token.GET: case Token.SET: @@ -439,7 +619,7 @@ namespace Mono.CSharp } break; case Token.WHERE: - if (!handle_where && query_parsing == 0) + if (!handle_where && !query_parsing) res = -1; break; case Token.FROM: @@ -447,7 +627,7 @@ namespace Mono.CSharp // A query expression is any expression that starts with `from identifier' // followed by any token except ; , = // - if (query_parsing == 0) { + if (!query_parsing) { if (lambda_arguments_parsing) { res = -1; break; @@ -474,12 +654,13 @@ namespace Mono.CSharp if (next_token == Token.SEMICOLON || next_token == Token.COMMA || next_token == Token.EQUALS) goto default; - ++query_parsing; + res = Token.FROM_FIRST; + query_parsing = true; if (RootContext.Version <= LanguageVersion.ISO_2) Report.FeatureIsNotAvailable (Location, "query expressions"); break; case Token.VOID: - Expression.Error_VoidInvalidInTheContext (Location); + Expression.Error_VoidInvalidInTheContext (Location, Report); break; default: PopPosition (); @@ -502,7 +683,7 @@ namespace Mono.CSharp case Token.ASCENDING: case Token.DESCENDING: case Token.INTO: - if (query_parsing == 0) + if (!query_parsing) res = -1; break; @@ -559,16 +740,22 @@ namespace Mono.CSharp } } - public Tokenizer (SeekableStreamReader input, CompilationUnit file) + public Tokenizer (SeekableStreamReader input, CompilationUnit file, CompilerContext ctx) { this.ref_name = file; this.file_name = file; + this.context = ctx; reader = input; putback_char = -1; xml_comment_buffer = new StringBuilder (); + if (Environment.OSVersion.Platform == PlatformID.Win32NT) + tab_size = 4; + else + tab_size = 8; + // // FIXME: This could be `Location.Push' but we have to // find out why the MS compiler allows this @@ -583,19 +770,29 @@ namespace Mono.CSharp static bool is_identifier_part_character (char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || - Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation; + if (c >= 'a' && c <= 'z') + return true; + + if (c >= 'A' && c <= 'Z') + return true; + + if (c == '_' || (c >= '0' && c <= '9')) + return true; + + if (c < 0x80) + return false; + + return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation; } public static bool IsKeyword (string s) { - return keyword_strings [s] != null; + return keyword_strings.ContainsKey (s); } // // Open parens micro parser. Detects both lambda and cast ambiguity. - // - + // int TokenizeOpenParens () { int ptoken; @@ -616,12 +813,8 @@ namespace Mono.CSharp // // Expression inside parens is lambda, (int i) => // - if (current_token == Token.ARROW) { - if (RootContext.Version <= LanguageVersion.ISO_2) - Report.FeatureIsNotAvailable (Location, "lambda expressions"); - + if (current_token == Token.ARROW) return Token.OPEN_PARENS_LAMBDA; - } // // Expression inside parens is single type, (int[]) @@ -638,12 +831,7 @@ namespace Mono.CSharp case Token.BANG: case Token.TILDE: case Token.IDENTIFIER: - case Token.LITERAL_INTEGER: - case Token.LITERAL_FLOAT: - case Token.LITERAL_DOUBLE: - case Token.LITERAL_DECIMAL: - case Token.LITERAL_CHARACTER: - case Token.LITERAL_STRING: + case Token.LITERAL: case Token.BASE: case Token.CHECKED: case Token.DELEGATE: @@ -781,6 +969,8 @@ namespace Mono.CSharp the_token = token (); } while (the_token != Token.CLOSE_BRACKET); the_token = token (); + } else if (the_token == Token.IN || the_token == Token.OUT) { + the_token = token (); } switch (the_token) { case Token.IDENTIFIER: @@ -801,7 +991,6 @@ namespace Mono.CSharp case Token.CHAR: case Token.VOID: break; - case Token.OP_GENERICS_GT: return true; @@ -871,7 +1060,7 @@ namespace Mono.CSharp // int TokenizePossibleNullableType () { - if (parsing_block == 0) + if (parsing_block == 0 || parsing_type > 0) return Token.INTERR_NULLABLE; int d = peek_char (); @@ -885,8 +1074,7 @@ namespace Mono.CSharp case Token.TRUE: case Token.FALSE: case Token.NULL: - case Token.LITERAL_INTEGER: - case Token.LITERAL_STRING: + case Token.LITERAL: return Token.INTERR; } @@ -898,18 +1086,15 @@ namespace Mono.CSharp } PushPosition (); + current_token = Token.NONE; int next_token; switch (xtoken ()) { - case Token.LITERAL_INTEGER: - case Token.LITERAL_STRING: - case Token.LITERAL_CHARACTER: - case Token.LITERAL_DECIMAL: - case Token.LITERAL_DOUBLE: - case Token.LITERAL_FLOAT: + case Token.LITERAL: case Token.TRUE: case Token.FALSE: case Token.NULL: case Token.THIS: + case Token.NEW: next_token = Token.INTERR; break; @@ -1006,24 +1191,18 @@ namespace Mono.CSharp return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f'); } - static int real_type_suffix (int c) + static TypeCode real_type_suffix (int c) { - int t; - switch (c){ case 'F': case 'f': - t = Token.LITERAL_FLOAT; - break; + return TypeCode.Single; case 'D': case 'd': - t = Token.LITERAL_DOUBLE; - break; + return TypeCode.Double; case 'M': case 'm': - t= Token.LITERAL_DECIMAL; - break; + return TypeCode.Decimal; default: - return Token.NONE; + return TypeCode.Empty; } - return t; } int integer_type_suffix (ulong ul, int c) @@ -1050,16 +1229,8 @@ namespace Mono.CSharp // Report.Warning (78, 4, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)"); } - // - // This goto statement causes the MS CLR 2.0 beta 1 csc to report an error, so - // work around that. - // - //goto case 'L'; - if (is_long) - scanning = false; - is_long = true; - get_char (); - break; + + goto case 'L'; case 'L': if (is_long) @@ -1077,38 +1248,40 @@ namespace Mono.CSharp } if (is_long && is_unsigned){ - val = ul; - return Token.LITERAL_INTEGER; - } else if (is_unsigned){ + val = new ULongLiteral (ul, Location); + return Token.LITERAL; + } + + if (is_unsigned){ // uint if possible, or ulong else. if ((ul & 0xffffffff00000000) == 0) - val = (uint) ul; + val = new UIntLiteral ((uint) ul, Location); else - val = ul; + val = new ULongLiteral (ul, Location); } else if (is_long){ // long if possible, ulong otherwise if ((ul & 0x8000000000000000) != 0) - val = ul; + val = new ULongLiteral (ul, Location); else - val = (long) ul; + val = new LongLiteral ((long) ul, Location); } else { // int, uint, long or ulong in that order if ((ul & 0xffffffff00000000) == 0){ uint ui = (uint) ul; if ((ui & 0x80000000) != 0) - val = ui; + val = new UIntLiteral (ui, Location); else - val = (int) ui; + val = new IntLiteral ((int) ui, Location); } else { if ((ul & 0x8000000000000000) != 0) - val = ul; + val = new ULongLiteral (ul, Location); else - val = (long) ul; + val = new LongLiteral ((long) ul, Location); } } - return Token.LITERAL_INTEGER; + return Token.LITERAL; } // @@ -1137,51 +1310,49 @@ namespace Mono.CSharp } catch (OverflowException) { error_details = "Integral constant is too large"; Report.Error (1021, Location, error_details); - val = 0ul; - return Token.LITERAL_INTEGER; + val = new IntLiteral (0, Location); + return Token.LITERAL; } catch (FormatException) { Report.Error (1013, Location, "Invalid number"); - val = 0ul; - return Token.LITERAL_INTEGER; + val = new IntLiteral (0, Location); + return Token.LITERAL; } } - int adjust_real (int t) + int adjust_real (TypeCode t) { string s = new String (number_builder, 0, number_pos); const string error_details = "Floating-point constant is outside the range of type `{0}'"; switch (t){ - case Token.LITERAL_DECIMAL: + case TypeCode.Decimal: try { - val = System.Decimal.Parse (s, styles, csharp_format_info); + val = new DecimalLiteral (decimal.Parse (s, styles, csharp_format_info), Location); } catch (OverflowException) { - val = 0m; + val = new DecimalLiteral (0, Location); Report.Error (594, Location, error_details, "decimal"); } break; - case Token.LITERAL_FLOAT: + case TypeCode.Single: try { - val = float.Parse (s, styles, csharp_format_info); + val = new FloatLiteral (float.Parse (s, styles, csharp_format_info), Location); } catch (OverflowException) { - val = 0.0f; + val = new FloatLiteral (0, Location); Report.Error (594, Location, error_details, "float"); } break; - - case Token.LITERAL_DOUBLE: - case Token.NONE: - t = Token.LITERAL_DOUBLE; + default: try { - val = System.Double.Parse (s, styles, csharp_format_info); + val = new DoubleLiteral (double.Parse (s, styles, csharp_format_info), Location); } catch (OverflowException) { - val = 0.0; + val = new DoubleLiteral (0, Location); Report.Error (594, Location, error_details, "double"); } break; } - return t; + + return Token.LITERAL; } int handle_hex () @@ -1207,13 +1378,13 @@ namespace Mono.CSharp } catch (OverflowException){ error_details = "Integral constant is too large"; Report.Error (1021, Location, error_details); - val = 0ul; - return Token.LITERAL_INTEGER; + val = new IntLiteral (0, Location); + return Token.LITERAL; } catch (FormatException) { Report.Error (1013, Location, "Invalid number"); - val = 0ul; - return Token.LITERAL_INTEGER; + val = new IntLiteral (0, Location); + return Token.LITERAL; } return integer_type_suffix (ul, peek_char ()); @@ -1225,7 +1396,6 @@ namespace Mono.CSharp int is_number (int c) { bool is_real = false; - int type; number_pos = 0; @@ -1282,21 +1452,21 @@ namespace Mono.CSharp c = get_char (); } - type = real_type_suffix (c); - if (type == Token.NONE && !is_real){ + var type = real_type_suffix (c); + if (type == TypeCode.Empty && !is_real){ putback (c); return adjust_int (c); - } else - is_real = true; + } + + is_real = true; - if (type == Token.NONE){ + if (type == TypeCode.Empty){ putback (c); } if (is_real) return adjust_real (type); - Console.WriteLine ("This should not be reached"); throw new Exception ("Is Number should never reach this point"); } @@ -1481,7 +1651,7 @@ namespace Mono.CSharp public bool advance () { - return peek_char () != -1; + return peek_char () != -1 || CompleteOnEOF; } public Object Value { @@ -1501,8 +1671,6 @@ namespace Mono.CSharp return current_token; } - static StringBuilder static_cmd_arg = new System.Text.StringBuilder (); - void get_cmd_arg (out string cmd, out string arg) { int c; @@ -1540,17 +1708,25 @@ namespace Mono.CSharp c = get_char (); static_cmd_arg.Length = 0; + int has_identifier_argument = 0; + while (c != -1 && c != '\n' && c != '\r') { - if (c == '\\') { - int peek = peek_char (); - if (peek == 'U' || peek == 'u') { - int surrogate; - c = EscapeUnicode (c, out surrogate); - if (surrogate != 0) { - if (is_identifier_part_character ((char) c)) - static_cmd_arg.Append ((char) c); - c = surrogate; + if (c == '\\' && has_identifier_argument >= 0) { + if (has_identifier_argument != 0 || (cmd == "define" || cmd == "if" || cmd == "elif" || cmd == "undef")) { + has_identifier_argument = 1; + + int peek = peek_char (); + if (peek == 'U' || peek == 'u') { + int surrogate; + c = EscapeUnicode (c, out surrogate); + if (surrogate != 0) { + if (is_identifier_part_character ((char) c)) + static_cmd_arg.Append ((char) c); + c = surrogate; + } } + } else { + has_identifier_argument = -1; } } static_cmd_arg.Append ((char) c); @@ -1793,18 +1969,18 @@ namespace Mono.CSharp int[] codes = ParseNumbers (arg.Substring (w_disable.Length)); foreach (int code in codes) { if (code != 0) - Report.RegisterWarningRegion (Location).WarningDisable (Location, code); + Report.RegisterWarningRegion (Location).WarningDisable (Location, code, Report); } return; } if (arg.StartsWith (w_restore)) { int[] codes = ParseNumbers (arg.Substring (w_restore.Length)); - Hashtable w_table = Report.warning_ignore_table; + var w_table = Report.warning_ignore_table; foreach (int code in codes) { - if (w_table != null && w_table.Contains (code)) - Report.Warning (1635, 1, Location, String.Format ("Cannot restore warning `CS{0:0000}' because it was disabled globally", code)); - Report.RegisterWarningRegion (Location).WarningEnable (Location, code); + if (w_table != null && w_table.ContainsKey (code)) + Report.Warning (1635, 1, Location, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code); + Report.RegisterWarningRegion (Location).WarningEnable (Location, code, Report); } return; } @@ -2072,7 +2248,7 @@ namespace Mono.CSharp Error_UnexpectedDirective ("no #region for this #endregion"); return true; } - int pop = (int) ifstack.Pop (); + int pop = ifstack.Pop (); if ((pop & REGION) == 0) Report.Error (1027, Location, "Expected `#endif' directive"); @@ -2081,13 +2257,13 @@ namespace Mono.CSharp case "if": if (ifstack == null) - ifstack = new Stack (2); + ifstack = new Stack (2); int flags = region_directive ? REGION : 0; if (ifstack.Count == 0){ flags |= PARENT_TAKING; } else { - int state = (int) ifstack.Peek (); + int state = ifstack.Peek (); if ((state & TAKING) != 0) { flags |= PARENT_TAKING; } @@ -2105,7 +2281,7 @@ namespace Mono.CSharp Error_UnexpectedDirective ("no #if for this #endif"); return true; } else { - pop = (int) ifstack.Pop (); + pop = ifstack.Pop (); if ((pop & REGION) != 0) Report.Error (1038, Location, "#endregion directive expected"); @@ -2117,7 +2293,7 @@ namespace Mono.CSharp if (ifstack.Count == 0) return true; - int state = (int) ifstack.Peek (); + int state = ifstack.Peek (); return (state & TAKING) != 0; } @@ -2126,7 +2302,7 @@ namespace Mono.CSharp Error_UnexpectedDirective ("no #if for this #elif"); return true; } else { - int state = (int) ifstack.Pop (); + int state = ifstack.Pop (); if ((state & REGION) != 0) { Report.Error (1038, Location, "#endregion directive expected"); @@ -2157,7 +2333,7 @@ namespace Mono.CSharp Error_UnexpectedDirective ("no #if for this #else"); return true; } else { - int state = (int) ifstack.Peek (); + int state = ifstack.Peek (); if ((state & REGION) != 0) { Report.Error (1038, Location, "#endregion directive expected"); @@ -2249,12 +2425,6 @@ namespace Mono.CSharp int c; string_builder.Length = 0; - // - // No need to parse full string when parsing lambda arguments - // - if (lambda_arguments_parsing) - return Token.LITERAL_STRING; - while ((c = get_char ()) != -1){ if (c == '"'){ if (quoted && peek_char () == '"'){ @@ -2262,8 +2432,8 @@ namespace Mono.CSharp get_char (); continue; } else { - val = string_builder.ToString (); - return Token.LITERAL_STRING; + val = new StringLiteral (string_builder.ToString (), Location); + return Token.LITERAL; } } @@ -2299,9 +2469,15 @@ namespace Mono.CSharp return res; } - private int consume_identifier (int c, bool quoted) + int consume_identifier (int c, bool quoted) { + // + // This method is very performance sensitive. It accounts + // for approximately 25% of all parser time + // + int pos = 0; + int column = col; if (c == '\\') { int surrogate; @@ -2313,32 +2489,44 @@ namespace Mono.CSharp } id_builder [pos++] = (char) c; - Location loc = Location; - while ((c = get_char ()) != -1) { - loop: - if (is_identifier_part_character ((char) c)){ - if (pos == max_id_size){ - Report.Error (645, loc, "Identifier too long (limit is 512 chars)"); - return Token.ERROR; + try { + while (true) { + c = reader.Read (); + + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')) { + id_builder [pos++] = (char) c; + continue; } - - id_builder [pos++] = (char) c; - } else if (c == '\\') { - int surrogate; - c = escape (c, out surrogate); - if (surrogate != 0) { - if (is_identifier_part_character ((char) c)) - id_builder [pos++] = (char) c; - c = surrogate; + + if (c < 0x80) { + if (c == '\\') { + int surrogate; + c = escape (c, out surrogate); + if (surrogate != 0) { + if (is_identifier_part_character ((char) c)) + id_builder[pos++] = (char) c; + c = surrogate; + } + + continue; + } + } else if (Char.IsLetter ((char) c) || Char.GetUnicodeCategory ((char) c) == UnicodeCategory.ConnectorPunctuation) { + id_builder [pos++] = (char) c; + continue; } - goto loop; - } else { - putback (c); + + putback_char = c; break; } + } catch (IndexOutOfRangeException) { + Report.Error (645, Location, "Identifier too long (limit is 512 chars)"); + col += pos - 1; + return Token.ERROR; } + col += pos - 1; + // // Optimization: avoids doing the keyword lookup // on uppercase letters @@ -2346,8 +2534,7 @@ namespace Mono.CSharp if (id_builder [0] >= '_' && !quoted) { int keyword = GetKeyword (id_builder, pos); if (keyword != -1) { - // TODO: No need to store location for keyword, required location cleanup - val = loc; + val = LocatedToken.Create (null, ref_line, column); return keyword; } } @@ -2356,38 +2543,33 @@ namespace Mono.CSharp // Keep identifiers in an array of hashtables to avoid needless // allocations // - CharArrayHashtable identifiers_group = identifiers [pos]; + var identifiers_group = identifiers [pos]; + string s; if (identifiers_group != null) { - val = identifiers_group [id_builder]; - if (val != null) { - val = new LocatedToken (loc, (string) val); + if (identifiers_group.TryGetValue (id_builder, out s)) { + val = LocatedToken.Create (s, ref_line, column); if (quoted) - AddEscapedIdentifier ((LocatedToken) val); + AddEscapedIdentifier (((LocatedToken) val).Location); return Token.IDENTIFIER; } } else { - identifiers_group = new CharArrayHashtable (pos); + // TODO: this should be number of files dependant + // corlib compilation peaks at 1000 and System.Core at 150 + int capacity = pos > 20 ? 10 : 100; + identifiers_group = new Dictionary (capacity, new IdentifiersComparer (pos)); identifiers [pos] = identifiers_group; } char [] chars = new char [pos]; Array.Copy (id_builder, chars, pos); - val = new String (id_builder, 0, pos); - identifiers_group.Add (chars, val); - - if (RootContext.Version == LanguageVersion.ISO_1) { - for (int i = 1; i < chars.Length; i += 3) { - if (chars [i] == '_' && (chars [i - 1] == '_' || chars [i + 1] == '_')) { - Report.Error (1638, loc, - "`{0}': Any identifier with double underscores cannot be used when ISO language version mode is specified", val.ToString ()); - } - } - } + s = new string (id_builder, 0, pos); + identifiers_group.Add (chars, s); - val = new LocatedToken (loc, (string) val); + val = LocatedToken.Create (s, ref_line, column); if (quoted) - AddEscapedIdentifier ((LocatedToken) val); + AddEscapedIdentifier (((LocatedToken) val).Location); + return Token.IDENTIFIER; } @@ -2400,7 +2582,7 @@ namespace Mono.CSharp while ((c = get_char ()) != -1) { switch (c) { case '\t': - col = ((col + 8) / 8) * 8; + col = ((col + tab_size) / tab_size) * tab_size; continue; case ' ': @@ -2408,8 +2590,20 @@ namespace Mono.CSharp case '\v': case 0xa0: case 0: + case 0xFEFF: // Ignore BOM anywhere in the file continue; +/* This is required for compatibility with .NET + case 0xEF: + if (peek_char () == 0xBB) { + PushPosition (); + get_char (); + if (get_char () == 0xBF) + continue; + PopPosition (); + } + break; +*/ case '\r': if (peek_char () != '\n') advance_line (); @@ -2426,20 +2620,22 @@ namespace Mono.CSharp return consume_identifier (c); case '{': - val = Location; + val = LocatedToken.Create (ref_line, col); return Token.OPEN_BRACE; case '}': - val = Location; + val = LocatedToken.Create (ref_line, col); return Token.CLOSE_BRACE; case '[': // To block doccomment inside attribute declaration. if (doc_state == XmlCommentState.Allowed) doc_state = XmlCommentState.NotAllowed; + val = LocatedToken.CreateOptional (ref_line, col); return Token.OPEN_BRACKET; case ']': + val = LocatedToken.CreateOptional (ref_line, col); return Token.CLOSE_BRACKET; case '(': - val = Location; + val = LocatedToken.Create (ref_line, col); // // An expression versions of parens can appear in block context only // @@ -2463,6 +2659,17 @@ namespace Mono.CSharp return Token.OPEN_PARENS; } + // Optimize using peek + int xx = peek_char (); + switch (xx) { + case '(': + case '\'': + case '"': + case '0': + case '1': + return Token.OPEN_PARENS; + } + lambda_arguments_parsing = true; PushPosition (); d = TokenizeOpenParens (); @@ -2473,22 +2680,29 @@ namespace Mono.CSharp return Token.OPEN_PARENS; case ')': + val = LocatedToken.CreateOptional (ref_line, col); return Token.CLOSE_PARENS; case ',': + val = LocatedToken.CreateOptional (ref_line, col); return Token.COMMA; case ';': + val = LocatedToken.CreateOptional (ref_line, col); return Token.SEMICOLON; case '~': + val = LocatedToken.Create (ref_line, col); return Token.TILDE; case '?': + val = LocatedToken.Create (ref_line, col); return TokenizePossibleNullableType (); case '<': + val = LocatedToken.Create (ref_line, col); if (parsing_generic_less_than++ > 0) return Token.OP_GENERICS_LT; return TokenizeLessThan (); case '>': + val = LocatedToken.Create (ref_line, col); d = peek_char (); if (d == '='){ @@ -2513,8 +2727,9 @@ namespace Mono.CSharp } return Token.OP_GT; - + case '+': + val = LocatedToken.Create (ref_line, col); d = peek_char (); if (d == '+') { d = Token.OP_INC; @@ -2527,6 +2742,7 @@ namespace Mono.CSharp return d; case '-': + val = LocatedToken.Create (ref_line, col); d = peek_char (); if (d == '-') { d = Token.OP_DEC; @@ -2541,6 +2757,7 @@ namespace Mono.CSharp return d; case '!': + val = LocatedToken.Create (ref_line, col); if (peek_char () == '='){ get_char (); return Token.OP_NE; @@ -2548,6 +2765,7 @@ namespace Mono.CSharp return Token.BANG; case '=': + val = LocatedToken.Create (ref_line, col); d = peek_char (); if (d == '='){ get_char (); @@ -2561,6 +2779,7 @@ namespace Mono.CSharp return Token.ASSIGN; case '&': + val = LocatedToken.Create (ref_line, col); d = peek_char (); if (d == '&'){ get_char (); @@ -2573,6 +2792,7 @@ namespace Mono.CSharp return Token.BITWISE_AND; case '|': + val = LocatedToken.Create (ref_line, col); d = peek_char (); if (d == '|'){ get_char (); @@ -2585,16 +2805,17 @@ namespace Mono.CSharp return Token.BITWISE_OR; case '*': + val = LocatedToken.Create (ref_line, col); if (peek_char () == '='){ get_char (); return Token.OP_MULT_ASSIGN; } - val = Location; return Token.STAR; case '/': d = peek_char (); if (d == '='){ + val = LocatedToken.Create (ref_line, col); get_char (); return Token.OP_DIV_ASSIGN; } @@ -2671,6 +2892,7 @@ namespace Mono.CSharp return Token.DIV; case '%': + val = LocatedToken.Create (ref_line, col); if (peek_char () == '='){ get_char (); return Token.OP_MOD_ASSIGN; @@ -2678,6 +2900,7 @@ namespace Mono.CSharp return Token.PERCENT; case '^': + val = LocatedToken.Create (ref_line, col); if (peek_char () == '='){ get_char (); return Token.OP_XOR_ASSIGN; @@ -2685,6 +2908,7 @@ namespace Mono.CSharp return Token.CARRET; case ':': + val = LocatedToken.Create (ref_line, col); if (peek_char () == ':') { get_char (); return Token.DOUBLE_COLON; @@ -2785,6 +3009,15 @@ namespace Mono.CSharp return Token.ERROR; } + if (CompleteOnEOF){ + if (generated) + return Token.COMPLETE_COMPLETION; + + generated = true; + return Token.GENERATE_COMPLETION; + } + + return Token.EOF; } @@ -2809,7 +3042,7 @@ namespace Mono.CSharp if (d != 0) throw new NotImplementedException (); - val = (char) c; + val = new CharLiteral ((char) c, Location); c = get_char (); if (c != '\'') { @@ -2823,7 +3056,7 @@ namespace Mono.CSharp return Token.ERROR; } - return Token.LITERAL_CHARACTER; + return Token.LITERAL; } int TokenizeLessThan () @@ -2970,6 +3203,10 @@ namespace Mono.CSharp return null; } + Report Report { + get { return context.Report; } + } + void reset_doc_comment () { xml_comment_buffer.Length = 0; @@ -2979,7 +3216,7 @@ namespace Mono.CSharp public void cleanup () { if (ifstack != null && ifstack.Count >= 1) { - int state = (int) ifstack.Pop (); + int state = ifstack.Pop (); if ((state & REGION) != 0) Report.Error (1038, Location, "#endregion directive expected"); else