2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
5 // Author: Miguel de Icaza (miguel@gnu.org)
6 // Marek Safar (marek.safar@seznam.cz)
8 // Dual licensed under the terms of the MIT X11 or GNU GPL
10 // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
11 // Copyright 2004-2008 Novell, Inc
17 using System.Collections.Generic;
19 using System.Globalization;
20 using System.Reflection;
25 /// Tokenizer for C# source code.
28 public class Tokenizer : yyParser.yyInput
32 public readonly int Token;
33 public KeywordEntry Next;
34 public readonly char[] Value;
36 public KeywordEntry (string value, int token)
38 this.Value = value.ToCharArray ();
43 sealed class IdentifiersComparer : IEqualityComparer<char[]>
47 public IdentifiersComparer (int length)
52 public bool Equals (char[] x, char[] y)
54 for (int i = 0; i < length; ++i)
61 public int GetHashCode (char[] obj)
64 for (int i = 0; i < length; ++i)
65 h = (h << 5) - h + obj [i];
72 // This class has to be used in the parser only, it reuses token
73 // details after each parse
75 public class LocatedToken
80 static LocatedToken[] buffer;
83 private LocatedToken ()
87 public static LocatedToken Create (int row, int column)
89 return Create (null, row, column);
92 public static LocatedToken Create (string value, int row, int column)
95 // TODO: I am not very happy about the logic but it's the best
96 // what I could come up with for now.
97 // Ideally we should be using just tiny buffer (256 elements) which
98 // is enough to hold all details for currect stack and recycle elements
99 // poped from the stack but there is a trick needed to recycle
103 if (pos >= buffer.Length) {
104 entry = new LocatedToken ();
106 entry = buffer [pos];
108 entry = new LocatedToken ();
109 buffer [pos] = entry;
116 entry.column = column;
120 public static void Initialize ()
123 buffer = new LocatedToken [10000];
127 public Location Location {
128 get { return new Location (row, column); }
131 public string Value {
132 get { return value; }
136 SeekableStreamReader reader;
138 CompilationUnit file_name;
139 CompilerContext context;
146 bool handle_get_set = false;
147 bool handle_remove_add = false;
148 bool handle_where = false;
149 bool handle_typeof = false;
150 bool lambda_arguments_parsing;
151 Location current_comment_location = Location.Null;
152 List<Location> escaped_identifiers;
153 int parsing_generic_less_than;
156 // Used mainly for parser optimizations. Some expressions for instance
157 // can appear only in block (including initializer, base initializer)
160 public int parsing_block;
161 internal bool query_parsing;
164 // When parsing type only, useful for ambiguous nullable types
166 public int parsing_type;
169 // Set when parsing generic declaration (type or method header)
171 public bool parsing_generic_declaration;
174 // The value indicates that we have not reach any declaration or
177 public int parsing_declaration;
180 // The special character to inject on streams to trigger the EXPRESSION_PARSE
181 // token to be returned. It just happens to be a Unicode character that
182 // would never be part of a program (can not be an identifier).
184 // This character is only tested just before the tokenizer is about to report
185 // an error; So on the regular operation mode, this addition will have no
186 // impact on the tokenizer's performance.
189 public const int EvalStatementParserCharacter = 0x2190; // Unicode Left Arrow
190 public const int EvalCompilationUnitParserCharacter = 0x2191; // Unicode Arrow
191 public const int EvalUsingDeclarationsParserCharacter = 0x2192; // Unicode Arrow
194 // XML documentation buffer. The save point is used to divide
195 // comments on types and comments on members.
197 StringBuilder xml_comment_buffer;
200 // See comment on XmlCommentState enumeration.
202 XmlCommentState xml_doc_state = XmlCommentState.Allowed;
205 // Whether tokens have been seen on this line
207 bool tokens_seen = false;
210 // Set to true once the GENERATE_COMPLETION token has bee
211 // returned. This helps produce one GENERATE_COMPLETION,
212 // as many COMPLETE_COMPLETION as necessary to complete the
213 // AST tree and one final EOF.
218 // Whether a token has been seen on the file
219 // This is needed because `define' is not allowed to be used
220 // after a token has been seen.
222 bool any_token_seen = false;
224 static readonly char[] simple_whitespaces = new char[] { ' ', '\t' };
226 public bool PropertyParsing {
227 get { return handle_get_set; }
228 set { handle_get_set = value; }
231 public bool EventParsing {
232 get { return handle_remove_add; }
233 set { handle_remove_add = value; }
236 public bool ConstraintsParsing {
237 get { return handle_where; }
238 set { handle_where = value; }
241 public bool TypeOfParsing {
242 get { return handle_typeof; }
243 set { handle_typeof = value; }
246 public XmlCommentState doc_state {
247 get { return xml_doc_state; }
249 if (value == XmlCommentState.Allowed) {
250 check_incorrect_doc_comment ();
251 reset_doc_comment ();
253 xml_doc_state = value;
258 // This is used to trigger completion generation on the parser
259 public bool CompleteOnEOF;
261 void AddEscapedIdentifier (Location loc)
263 if (escaped_identifiers == null)
264 escaped_identifiers = new List<Location> ();
266 escaped_identifiers.Add (loc);
269 public bool IsEscapedIdentifier (Location loc)
271 if (escaped_identifiers != null) {
272 foreach (Location lt in escaped_identifiers)
283 static KeywordEntry[][] keywords;
284 static Dictionary<string, object> keyword_strings; // TODO: HashSet
285 static NumberStyles styles;
286 static NumberFormatInfo csharp_format_info;
289 // Values for the associated token returned
291 internal int putback_char; // Used by repl only
297 const int TAKING = 1;
298 const int ELSE_SEEN = 4;
299 const int PARENT_TAKING = 8;
300 const int REGION = 16;
303 // pre-processor if stack state:
307 static System.Text.StringBuilder string_builder;
309 const int max_id_size = 512;
310 static char [] id_builder = new char [max_id_size];
312 public static Dictionary<char[], string>[] identifiers = new Dictionary<char[], string>[max_id_size + 1];
314 const int max_number_size = 512;
315 static char [] number_builder = new char [max_number_size];
316 static int number_pos;
318 static StringBuilder static_cmd_arg = new System.Text.StringBuilder ();
321 // Details about the error encoutered by the tokenizer
323 string error_details;
325 public string error {
327 return error_details;
338 // This is used when the tokenizer needs to save
339 // the current position as it needs to do some parsing
340 // on its own to deamiguate a token in behalf of the
343 Stack<Position> position_stack = new Stack<Position> (2);
351 public int putback_char;
352 public int previous_col;
353 public Stack<int> ifstack;
354 public int parsing_generic_less_than;
355 public int current_token;
358 public Position (Tokenizer t)
360 position = t.reader.Position;
362 ref_line = t.ref_line;
365 putback_char = t.putback_char;
366 previous_col = t.previous_col;
367 if (t.ifstack != null && t.ifstack.Count != 0) {
368 // There is no simple way to clone Stack<T> all
369 // methods reverse the order
370 var clone = t.ifstack.ToArray ();
371 Array.Reverse (clone);
372 ifstack = new Stack<int> (clone);
374 parsing_generic_less_than = t.parsing_generic_less_than;
375 current_token = t.current_token;
380 public void PushPosition ()
382 position_stack.Push (new Position (this));
385 public void PopPosition ()
387 Position p = position_stack.Pop ();
389 reader.Position = p.position;
390 ref_line = p.ref_line;
394 putback_char = p.putback_char;
395 previous_col = p.previous_col;
397 parsing_generic_less_than = p.parsing_generic_less_than;
398 current_token = p.current_token;
402 // Do not reset the position, ignore it.
403 public void DiscardPosition ()
405 position_stack.Pop ();
408 static void AddKeyword (string kw, int token)
410 keyword_strings.Add (kw, null);
412 int length = kw.Length;
413 if (keywords [length] == null) {
414 keywords [length] = new KeywordEntry ['z' - '_' + 1];
417 int char_index = kw [0] - '_';
418 KeywordEntry kwe = keywords [length] [char_index];
420 keywords [length] [char_index] = new KeywordEntry (kw, token);
424 while (kwe.Next != null) {
428 kwe.Next = new KeywordEntry (kw, token);
431 static void InitTokens ()
433 keyword_strings = new Dictionary<string, object> ();
435 // 11 is the length of the longest keyword for now
436 keywords = new KeywordEntry [11] [];
438 AddKeyword ("__arglist", Token.ARGLIST);
439 AddKeyword ("abstract", Token.ABSTRACT);
440 AddKeyword ("as", Token.AS);
441 AddKeyword ("add", Token.ADD);
442 AddKeyword ("base", Token.BASE);
443 AddKeyword ("bool", Token.BOOL);
444 AddKeyword ("break", Token.BREAK);
445 AddKeyword ("byte", Token.BYTE);
446 AddKeyword ("case", Token.CASE);
447 AddKeyword ("catch", Token.CATCH);
448 AddKeyword ("char", Token.CHAR);
449 AddKeyword ("checked", Token.CHECKED);
450 AddKeyword ("class", Token.CLASS);
451 AddKeyword ("const", Token.CONST);
452 AddKeyword ("continue", Token.CONTINUE);
453 AddKeyword ("decimal", Token.DECIMAL);
454 AddKeyword ("default", Token.DEFAULT);
455 AddKeyword ("delegate", Token.DELEGATE);
456 AddKeyword ("do", Token.DO);
457 AddKeyword ("double", Token.DOUBLE);
458 AddKeyword ("else", Token.ELSE);
459 AddKeyword ("enum", Token.ENUM);
460 AddKeyword ("event", Token.EVENT);
461 AddKeyword ("explicit", Token.EXPLICIT);
462 AddKeyword ("extern", Token.EXTERN);
463 AddKeyword ("false", Token.FALSE);
464 AddKeyword ("finally", Token.FINALLY);
465 AddKeyword ("fixed", Token.FIXED);
466 AddKeyword ("float", Token.FLOAT);
467 AddKeyword ("for", Token.FOR);
468 AddKeyword ("foreach", Token.FOREACH);
469 AddKeyword ("goto", Token.GOTO);
470 AddKeyword ("get", Token.GET);
471 AddKeyword ("if", Token.IF);
472 AddKeyword ("implicit", Token.IMPLICIT);
473 AddKeyword ("in", Token.IN);
474 AddKeyword ("int", Token.INT);
475 AddKeyword ("interface", Token.INTERFACE);
476 AddKeyword ("internal", Token.INTERNAL);
477 AddKeyword ("is", Token.IS);
478 AddKeyword ("lock", Token.LOCK);
479 AddKeyword ("long", Token.LONG);
480 AddKeyword ("namespace", Token.NAMESPACE);
481 AddKeyword ("new", Token.NEW);
482 AddKeyword ("null", Token.NULL);
483 AddKeyword ("object", Token.OBJECT);
484 AddKeyword ("operator", Token.OPERATOR);
485 AddKeyword ("out", Token.OUT);
486 AddKeyword ("override", Token.OVERRIDE);
487 AddKeyword ("params", Token.PARAMS);
488 AddKeyword ("private", Token.PRIVATE);
489 AddKeyword ("protected", Token.PROTECTED);
490 AddKeyword ("public", Token.PUBLIC);
491 AddKeyword ("readonly", Token.READONLY);
492 AddKeyword ("ref", Token.REF);
493 AddKeyword ("remove", Token.REMOVE);
494 AddKeyword ("return", Token.RETURN);
495 AddKeyword ("sbyte", Token.SBYTE);
496 AddKeyword ("sealed", Token.SEALED);
497 AddKeyword ("set", Token.SET);
498 AddKeyword ("short", Token.SHORT);
499 AddKeyword ("sizeof", Token.SIZEOF);
500 AddKeyword ("stackalloc", Token.STACKALLOC);
501 AddKeyword ("static", Token.STATIC);
502 AddKeyword ("string", Token.STRING);
503 AddKeyword ("struct", Token.STRUCT);
504 AddKeyword ("switch", Token.SWITCH);
505 AddKeyword ("this", Token.THIS);
506 AddKeyword ("throw", Token.THROW);
507 AddKeyword ("true", Token.TRUE);
508 AddKeyword ("try", Token.TRY);
509 AddKeyword ("typeof", Token.TYPEOF);
510 AddKeyword ("uint", Token.UINT);
511 AddKeyword ("ulong", Token.ULONG);
512 AddKeyword ("unchecked", Token.UNCHECKED);
513 AddKeyword ("unsafe", Token.UNSAFE);
514 AddKeyword ("ushort", Token.USHORT);
515 AddKeyword ("using", Token.USING);
516 AddKeyword ("virtual", Token.VIRTUAL);
517 AddKeyword ("void", Token.VOID);
518 AddKeyword ("volatile", Token.VOLATILE);
519 AddKeyword ("while", Token.WHILE);
520 AddKeyword ("partial", Token.PARTIAL);
521 AddKeyword ("where", Token.WHERE);
524 AddKeyword ("from", Token.FROM);
525 AddKeyword ("join", Token.JOIN);
526 AddKeyword ("on", Token.ON);
527 AddKeyword ("equals", Token.EQUALS);
528 AddKeyword ("select", Token.SELECT);
529 AddKeyword ("group", Token.GROUP);
530 AddKeyword ("by", Token.BY);
531 AddKeyword ("let", Token.LET);
532 AddKeyword ("orderby", Token.ORDERBY);
533 AddKeyword ("ascending", Token.ASCENDING);
534 AddKeyword ("descending", Token.DESCENDING);
535 AddKeyword ("into", Token.INTO);
544 csharp_format_info = NumberFormatInfo.InvariantInfo;
545 styles = NumberStyles.Float;
547 string_builder = new System.Text.StringBuilder ();
550 int GetKeyword (char[] id, int id_len)
553 // Keywords are stored in an array of arrays grouped by their
554 // length and then by the first character
556 if (id_len >= keywords.Length || keywords [id_len] == null)
559 int first_index = id [0] - '_';
560 if (first_index > 'z')
563 KeywordEntry kwe = keywords [id_len] [first_index];
570 for (int i = 1; i < id_len; ++i) {
571 if (id [i] != kwe.Value [i]) {
577 } while (res == 0 && kwe != null);
591 if (!handle_remove_add)
595 if (parsing_declaration == 0)
596 res = Token.EXTERN_ALIAS;
599 if (peek_token () == Token.COLON) {
601 res = Token.DEFAULT_COLON;
605 if (!handle_where && !query_parsing)
610 // A query expression is any expression that starts with `from identifier'
611 // followed by any token except ; , =
613 if (!query_parsing) {
614 if (lambda_arguments_parsing) {
620 // HACK: to disable generics micro-parser, because PushPosition does not
621 // store identifiers array
622 parsing_generic_less_than = 1;
624 case Token.IDENTIFIER:
636 next_token = xtoken ();
637 if (next_token == Token.SEMICOLON || next_token == Token.COMMA || next_token == Token.EQUALS)
640 res = Token.FROM_FIRST;
641 query_parsing = true;
642 if (RootContext.Version <= LanguageVersion.ISO_2)
643 Report.FeatureIsNotAvailable (Location, "query expressions");
646 Expression.Error_VoidInvalidInTheContext (Location, Report);
650 // HACK: A token is not a keyword so we need to restore identifiers buffer
651 // which has been overwritten before we grabbed the identifier
652 id_builder [0] = 'f'; id_builder [1] = 'r'; id_builder [2] = 'o'; id_builder [3] = 'm';
666 case Token.ASCENDING:
667 case Token.DESCENDING:
674 case Token.NAMESPACE:
675 // TODO: some explanation needed
676 check_incorrect_doc_comment ();
680 if (parsing_block > 0) {
685 // Save current position and parse next token.
688 next_token = token ();
689 bool ok = (next_token == Token.CLASS) ||
690 (next_token == Token.STRUCT) ||
691 (next_token == Token.INTERFACE) ||
692 (next_token == Token.VOID);
697 if (next_token == Token.VOID) {
698 if (RootContext.Version == LanguageVersion.ISO_1 ||
699 RootContext.Version == LanguageVersion.ISO_2)
700 Report.FeatureIsNotAvailable (Location, "partial methods");
701 } else if (RootContext.Version == LanguageVersion.ISO_1)
702 Report.FeatureIsNotAvailable (Location, "partial types");
707 if (next_token < Token.LAST_KEYWORD) {
708 Report.Error (267, Location,
709 "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
720 public Location Location {
722 return new Location (ref_line, hidden ? -1 : col);
726 public Tokenizer (SeekableStreamReader input, CompilationUnit file, CompilerContext ctx)
728 this.ref_name = file;
729 this.file_name = file;
735 xml_comment_buffer = new StringBuilder ();
738 // FIXME: This could be `Location.Push' but we have to
739 // find out why the MS compiler allows this
741 Mono.CSharp.Location.Push (file, file);
744 static bool is_identifier_start_character (int c)
746 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter ((char)c);
749 static bool is_identifier_part_character (char c)
751 if (c >= 'a' && c <= 'z')
754 if (c >= 'A' && c <= 'Z')
757 if (c == '_' || (c >= '0' && c <= '9'))
763 return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
766 public static bool IsKeyword (string s)
768 return keyword_strings.ContainsKey (s);
772 // Open parens micro parser. Detects both lambda and cast ambiguity.
774 int TokenizeOpenParens ()
779 int bracket_level = 0;
780 bool is_type = false;
781 bool can_be_type = false;
784 ptoken = current_token;
787 switch (current_token) {
788 case Token.CLOSE_PARENS:
792 // Expression inside parens is lambda, (int i) =>
794 if (current_token == Token.ARROW)
795 return Token.OPEN_PARENS_LAMBDA;
798 // Expression inside parens is single type, (int[])
801 return Token.OPEN_PARENS_CAST;
804 // Expression is possible cast, look at next token, (T)null
807 switch (current_token) {
808 case Token.OPEN_PARENS:
811 case Token.IDENTIFIER:
825 case Token.UNCHECKED:
830 // These can be part of a member access
844 return Token.OPEN_PARENS_CAST;
847 return Token.OPEN_PARENS;
850 case Token.DOUBLE_COLON:
851 if (ptoken != Token.IDENTIFIER && ptoken != Token.OP_GENERICS_GT)
856 case Token.IDENTIFIER:
859 case Token.OP_GENERICS_LT:
861 case Token.DOUBLE_COLON:
863 if (bracket_level == 0)
867 can_be_type = is_type = false;
887 if (bracket_level == 0)
892 if (bracket_level == 0) {
894 can_be_type = is_type = false;
898 case Token.OP_GENERICS_LT:
899 case Token.OPEN_BRACKET:
900 if (bracket_level++ == 0)
904 case Token.OP_GENERICS_GT:
905 case Token.CLOSE_BRACKET:
909 case Token.INTERR_NULLABLE:
911 if (bracket_level == 0)
917 can_be_type = is_type = false;
921 return Token.OPEN_PARENS;
926 public static bool IsValidIdentifier (string s)
928 if (s == null || s.Length == 0)
931 if (!is_identifier_start_character (s [0]))
934 for (int i = 1; i < s.Length; i ++)
935 if (! is_identifier_part_character (s [i]))
941 bool parse_less_than ()
944 int the_token = token ();
945 if (the_token == Token.OPEN_BRACKET) {
947 the_token = token ();
948 } while (the_token != Token.CLOSE_BRACKET);
949 the_token = token ();
950 } else if (the_token == Token.IN || the_token == Token.OUT) {
951 the_token = token ();
954 case Token.IDENTIFIER:
972 case Token.OP_GENERICS_GT:
979 the_token = token ();
981 if (the_token == Token.OP_GENERICS_GT)
983 else if (the_token == Token.COMMA || the_token == Token.DOT || the_token == Token.DOUBLE_COLON)
985 else if (the_token == Token.INTERR_NULLABLE || the_token == Token.STAR)
987 else if (the_token == Token.OP_GENERICS_LT) {
988 if (!parse_less_than ())
991 } else if (the_token == Token.OPEN_BRACKET) {
993 the_token = token ();
994 if (the_token == Token.CLOSE_BRACKET)
996 else if (the_token == Token.COMMA)
997 goto rank_specifiers;
1004 bool parse_generic_dimension (out int dimension)
1009 int the_token = token ();
1010 if (the_token == Token.OP_GENERICS_GT)
1012 else if (the_token == Token.COMMA) {
1020 public int peek_token ()
1025 the_token = token ();
1032 // Tonizes `?' using custom disambiguous rules to return one
1033 // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
1035 // Tricky expression look like:
1037 // Foo ? a = x ? b : c;
1039 int TokenizePossibleNullableType ()
1041 if (parsing_block == 0 || parsing_type > 0)
1042 return Token.INTERR_NULLABLE;
1044 int d = peek_char ();
1047 return Token.OP_COALESCING;
1050 switch (current_token) {
1051 case Token.CLOSE_PARENS:
1056 return Token.INTERR;
1060 if (d == ',' || d == ';' || d == '>')
1061 return Token.INTERR_NULLABLE;
1062 if (d == '*' || (d >= '0' && d <= '9'))
1063 return Token.INTERR;
1067 current_token = Token.NONE;
1069 switch (xtoken ()) {
1076 next_token = Token.INTERR;
1079 case Token.SEMICOLON:
1081 case Token.CLOSE_PARENS:
1082 case Token.OPEN_BRACKET:
1083 case Token.OP_GENERICS_GT:
1084 next_token = Token.INTERR_NULLABLE;
1092 if (next_token == -1) {
1093 switch (xtoken ()) {
1095 case Token.SEMICOLON:
1096 case Token.OPEN_BRACE:
1097 case Token.CLOSE_PARENS:
1099 next_token = Token.INTERR_NULLABLE;
1103 next_token = Token.INTERR;
1111 // All shorcuts failed, do it hard way
1113 while ((ntoken = xtoken ()) != Token.EOF) {
1114 if (ntoken == Token.SEMICOLON)
1117 if (ntoken == Token.COLON) {
1118 if (++colons == interrs)
1123 if (ntoken == Token.INTERR) {
1129 next_token = colons != interrs ? Token.INTERR_NULLABLE : Token.INTERR;
1138 bool decimal_digits (int c)
1141 bool seen_digits = false;
1144 if (number_pos == max_number_size)
1145 Error_NumericConstantTooLong ();
1146 number_builder [number_pos++] = (char) c;
1150 // We use peek_char2, because decimal_digits needs to do a
1151 // 2-character look-ahead (5.ToString for example).
1153 while ((d = peek_char2 ()) != -1){
1154 if (d >= '0' && d <= '9'){
1155 if (number_pos == max_number_size)
1156 Error_NumericConstantTooLong ();
1157 number_builder [number_pos++] = (char) d;
1167 static bool is_hex (int e)
1169 return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
1172 static TypeCode real_type_suffix (int c)
1176 return TypeCode.Single;
1178 return TypeCode.Double;
1180 return TypeCode.Decimal;
1182 return TypeCode.Empty;
1186 int integer_type_suffix (ulong ul, int c)
1188 bool is_unsigned = false;
1189 bool is_long = false;
1192 bool scanning = true;
1205 // if we have not seen anything in between
1206 // report this error
1208 Report.Warning (78, 4, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
1228 if (is_long && is_unsigned){
1229 val = new ULongLiteral (ul, Location);
1230 return Token.LITERAL;
1234 // uint if possible, or ulong else.
1236 if ((ul & 0xffffffff00000000) == 0)
1237 val = new UIntLiteral ((uint) ul, Location);
1239 val = new ULongLiteral (ul, Location);
1240 } else if (is_long){
1241 // long if possible, ulong otherwise
1242 if ((ul & 0x8000000000000000) != 0)
1243 val = new ULongLiteral (ul, Location);
1245 val = new LongLiteral ((long) ul, Location);
1247 // int, uint, long or ulong in that order
1248 if ((ul & 0xffffffff00000000) == 0){
1249 uint ui = (uint) ul;
1251 if ((ui & 0x80000000) != 0)
1252 val = new UIntLiteral (ui, Location);
1254 val = new IntLiteral ((int) ui, Location);
1256 if ((ul & 0x8000000000000000) != 0)
1257 val = new ULongLiteral (ul, Location);
1259 val = new LongLiteral ((long) ul, Location);
1262 return Token.LITERAL;
1266 // given `c' as the next char in the input decide whether
1267 // we need to convert to a special type, and then choose
1268 // the best representation for the integer
1270 int adjust_int (int c)
1273 if (number_pos > 9){
1274 ulong ul = (uint) (number_builder [0] - '0');
1276 for (int i = 1; i < number_pos; i++){
1277 ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0')));
1279 return integer_type_suffix (ul, c);
1281 uint ui = (uint) (number_builder [0] - '0');
1283 for (int i = 1; i < number_pos; i++){
1284 ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0')));
1286 return integer_type_suffix (ui, c);
1288 } catch (OverflowException) {
1289 error_details = "Integral constant is too large";
1290 Report.Error (1021, Location, error_details);
1291 val = new IntLiteral (0, Location);
1292 return Token.LITERAL;
1294 catch (FormatException) {
1295 Report.Error (1013, Location, "Invalid number");
1296 val = new IntLiteral (0, Location);
1297 return Token.LITERAL;
1301 int adjust_real (TypeCode t)
1303 string s = new String (number_builder, 0, number_pos);
1304 const string error_details = "Floating-point constant is outside the range of type `{0}'";
1307 case TypeCode.Decimal:
1309 val = new DecimalLiteral (decimal.Parse (s, styles, csharp_format_info), Location);
1310 } catch (OverflowException) {
1311 val = new DecimalLiteral (0, Location);
1312 Report.Error (594, Location, error_details, "decimal");
1315 case TypeCode.Single:
1317 val = new FloatLiteral (float.Parse (s, styles, csharp_format_info), Location);
1318 } catch (OverflowException) {
1319 val = new FloatLiteral (0, Location);
1320 Report.Error (594, Location, error_details, "float");
1325 val = new DoubleLiteral (double.Parse (s, styles, csharp_format_info), Location);
1326 } catch (OverflowException) {
1327 val = new DoubleLiteral (0, Location);
1328 Report.Error (594, Location, error_details, "double");
1333 return Token.LITERAL;
1342 while ((d = peek_char ()) != -1){
1344 number_builder [number_pos++] = (char) d;
1350 string s = new String (number_builder, 0, number_pos);
1352 if (number_pos <= 8)
1353 ul = System.UInt32.Parse (s, NumberStyles.HexNumber);
1355 ul = System.UInt64.Parse (s, NumberStyles.HexNumber);
1356 } catch (OverflowException){
1357 error_details = "Integral constant is too large";
1358 Report.Error (1021, Location, error_details);
1359 val = new IntLiteral (0, Location);
1360 return Token.LITERAL;
1362 catch (FormatException) {
1363 Report.Error (1013, Location, "Invalid number");
1364 val = new IntLiteral (0, Location);
1365 return Token.LITERAL;
1368 return integer_type_suffix (ul, peek_char ());
1372 // Invoked if we know we have .digits or digits
1374 int is_number (int c)
1376 bool is_real = false;
1380 if (c >= '0' && c <= '9'){
1382 int peek = peek_char ();
1384 if (peek == 'x' || peek == 'X')
1385 return handle_hex ();
1392 // We need to handle the case of
1393 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
1396 if (decimal_digits ('.')){
1402 return adjust_int (-1);
1406 if (c == 'e' || c == 'E'){
1408 if (number_pos == max_number_size)
1409 Error_NumericConstantTooLong ();
1410 number_builder [number_pos++] = 'e';
1414 if (number_pos == max_number_size)
1415 Error_NumericConstantTooLong ();
1416 number_builder [number_pos++] = '+';
1418 } else if (c == '-') {
1419 if (number_pos == max_number_size)
1420 Error_NumericConstantTooLong ();
1421 number_builder [number_pos++] = '-';
1424 if (number_pos == max_number_size)
1425 Error_NumericConstantTooLong ();
1426 number_builder [number_pos++] = '+';
1433 var type = real_type_suffix (c);
1434 if (type == TypeCode.Empty && !is_real){
1436 return adjust_int (c);
1441 if (type == TypeCode.Empty){
1446 return adjust_real (type);
1448 throw new Exception ("Is Number should never reach this point");
1452 // Accepts exactly count (4 or 8) hex, no more no less
1454 int getHex (int count, out int surrogate, out bool error)
1459 int top = count != -1 ? count : 4;
1464 for (i = 0; i < top; i++){
1467 if (c >= '0' && c <= '9')
1468 c = (int) c - (int) '0';
1469 else if (c >= 'A' && c <= 'F')
1470 c = (int) c - (int) 'A' + 10;
1471 else if (c >= 'a' && c <= 'f')
1472 c = (int) c - (int) 'a' + 10;
1478 total = (total * 16) + c;
1480 int p = peek_char ();
1483 if (!is_hex ((char)p))
1489 if (total > 0x0010FFFF) {
1494 if (total >= 0x00010000) {
1495 surrogate = ((total - 0x00010000) % 0x0400 + 0xDC00);
1496 total = ((total - 0x00010000) / 0x0400 + 0xD800);
1503 int escape (int c, out int surrogate)
1539 v = getHex (-1, out surrogate, out error);
1545 return EscapeUnicode (d, out surrogate);
1548 Report.Error (1009, Location, "Unrecognized escape sequence `\\{0}'", ((char)d).ToString ());
1557 int EscapeUnicode (int ch, out int surrogate)
1561 ch = getHex (8, out surrogate, out error);
1563 ch = getHex (4, out surrogate, out error);
1567 Report.Error (1009, Location, "Unrecognized escape sequence");
1575 if (putback_char != -1) {
1588 void advance_line ()
1598 if (putback_char == -1)
1599 putback_char = reader.Read ();
1600 return putback_char;
1605 if (putback_char != -1)
1606 return putback_char;
1607 return reader.Peek ();
1610 void putback (int c)
1612 if (putback_char != -1){
1613 Console.WriteLine ("Col: " + col);
1614 Console.WriteLine ("Row: " + line);
1615 Console.WriteLine ("Name: " + ref_name.Name);
1616 Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);
1617 throw new Exception ("This should not happen putback on putback");
1619 if (c == '\n' || col == 0) {
1620 // It won't happen though.
1630 public bool advance ()
1632 return peek_char () != -1 || CompleteOnEOF;
1635 public Object Value {
1641 public Object value ()
1648 current_token = xtoken ();
1649 return current_token;
1652 void get_cmd_arg (out string cmd, out string arg)
1656 tokens_seen = false;
1659 // skip over white space
1662 } while (c == '\r' || c == ' ' || c == '\t');
1664 static_cmd_arg.Length = 0;
1665 while (c != -1 && is_identifier_part_character ((char)c)) {
1666 static_cmd_arg.Append ((char)c);
1669 int peek = peek_char ();
1670 if (peek == 'U' || peek == 'u') {
1672 c = EscapeUnicode (c, out surrogate);
1673 if (surrogate != 0) {
1674 if (is_identifier_part_character ((char) c))
1675 static_cmd_arg.Append ((char) c);
1682 cmd = static_cmd_arg.ToString ();
1684 // skip over white space
1685 while (c == '\r' || c == ' ' || c == '\t')
1688 static_cmd_arg.Length = 0;
1689 int has_identifier_argument = 0;
1691 while (c != -1 && c != '\n' && c != '\r') {
1692 if (c == '\\' && has_identifier_argument >= 0) {
1693 if (has_identifier_argument != 0 || (cmd == "define" || cmd == "if" || cmd == "elif" || cmd == "undef")) {
1694 has_identifier_argument = 1;
1696 int peek = peek_char ();
1697 if (peek == 'U' || peek == 'u') {
1699 c = EscapeUnicode (c, out surrogate);
1700 if (surrogate != 0) {
1701 if (is_identifier_part_character ((char) c))
1702 static_cmd_arg.Append ((char) c);
1707 has_identifier_argument = -1;
1710 static_cmd_arg.Append ((char) c);
1714 if (static_cmd_arg.Length != 0)
1715 arg = static_cmd_arg.ToString ();
1719 // Handles the #line directive
1721 bool PreProcessLine (string arg)
1723 if (arg.Length == 0)
1726 if (arg == "default"){
1728 ref_name = file_name;
1730 Location.Push (file_name, ref_name);
1732 } else if (arg == "hidden"){
1740 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
1741 ref_line = System.Int32.Parse (arg.Substring (0, pos));
1744 char [] quotes = { '\"' };
1746 string name = arg.Substring (pos). Trim (quotes);
1747 ref_name = Location.LookupFile (file_name, name);
1748 file_name.AddFile (ref_name);
1750 Location.Push (file_name, ref_name);
1752 ref_line = System.Int32.Parse (arg);
1763 // Handles #define and #undef
1765 void PreProcessDefinition (bool is_define, string ident, bool caller_is_taking)
1767 if (ident.Length == 0 || ident == "true" || ident == "false"){
1768 Report.Error (1001, Location, "Missing identifier to pre-processor directive");
1772 if (ident.IndexOfAny (simple_whitespaces) != -1){
1773 Error_EndLineExpected ();
1777 if (!is_identifier_start_character (ident [0]))
1778 Report.Error (1001, Location, "Identifier expected: {0}", ident);
1780 foreach (char c in ident.Substring (1)){
1781 if (!is_identifier_part_character (c)){
1782 Report.Error (1001, Location, "Identifier expected: {0}", ident);
1787 if (!caller_is_taking)
1794 if (RootContext.IsConditionalDefined (ident))
1797 file_name.AddDefine (ident);
1802 file_name.AddUndefine (ident);
1806 static byte read_hex (string arg, int pos, out bool error)
1813 if ((c >= '0') && (c <= '9'))
1814 total = (int) c - (int) '0';
1815 else if ((c >= 'A') && (c <= 'F'))
1816 total = (int) c - (int) 'A' + 10;
1817 else if ((c >= 'a') && (c <= 'f'))
1818 total = (int) c - (int) 'a' + 10;
1827 if ((c >= '0') && (c <= '9'))
1828 total += (int) c - (int) '0';
1829 else if ((c >= 'A') && (c <= 'F'))
1830 total += (int) c - (int) 'A' + 10;
1831 else if ((c >= 'a') && (c <= 'f'))
1832 total += (int) c - (int) 'a' + 10;
1838 return (byte) total;
1842 /// Handles #pragma checksum
1844 bool PreProcessPragmaChecksum (string arg)
1846 if ((arg [0] != ' ') && (arg [0] != '\t'))
1849 arg = arg.Trim (simple_whitespaces);
1850 if ((arg.Length < 2) || (arg [0] != '"'))
1853 StringBuilder file_sb = new StringBuilder ();
1857 while ((ch = arg [pos++]) != '"') {
1858 if (pos >= arg.Length)
1862 if (pos+1 >= arg.Length)
1867 file_sb.Append (ch);
1870 if ((pos+2 >= arg.Length) || ((arg [pos] != ' ') && (arg [pos] != '\t')))
1873 arg = arg.Substring (pos).Trim (simple_whitespaces);
1874 if ((arg.Length < 42) || (arg [0] != '"') || (arg [1] != '{') ||
1875 (arg [10] != '-') || (arg [15] != '-') || (arg [20] != '-') ||
1876 (arg [25] != '-') || (arg [38] != '}') || (arg [39] != '"'))
1880 byte[] guid_bytes = new byte [16];
1882 for (int i = 0; i < 4; i++) {
1883 guid_bytes [i] = read_hex (arg, 2+2*i, out error);
1887 for (int i = 0; i < 2; i++) {
1888 guid_bytes [i+4] = read_hex (arg, 11+2*i, out error);
1891 guid_bytes [i+6] = read_hex (arg, 16+2*i, out error);
1894 guid_bytes [i+8] = read_hex (arg, 21+2*i, out error);
1899 for (int i = 0; i < 6; i++) {
1900 guid_bytes [i+10] = read_hex (arg, 26+2*i, out error);
1905 arg = arg.Substring (40).Trim (simple_whitespaces);
1906 if ((arg.Length < 34) || (arg [0] != '"') || (arg [33] != '"'))
1909 byte[] checksum_bytes = new byte [16];
1910 for (int i = 0; i < 16; i++) {
1911 checksum_bytes [i] = read_hex (arg, 1+2*i, out error);
1916 arg = arg.Substring (34).Trim (simple_whitespaces);
1920 SourceFile file = Location.LookupFile (file_name, file_sb.ToString ());
1921 file.SetChecksum (guid_bytes, checksum_bytes);
1922 ref_name.AutoGenerated = true;
1927 /// Handles #pragma directive
1929 void PreProcessPragma (string arg)
1931 const string warning = "warning";
1932 const string w_disable = "warning disable";
1933 const string w_restore = "warning restore";
1934 const string checksum = "checksum";
1936 if (arg == w_disable) {
1937 Report.RegisterWarningRegion (Location).WarningDisable (Location.Row);
1941 if (arg == w_restore) {
1942 Report.RegisterWarningRegion (Location).WarningEnable (Location.Row);
1946 if (arg.StartsWith (w_disable)) {
1947 int[] codes = ParseNumbers (arg.Substring (w_disable.Length));
1948 foreach (int code in codes) {
1950 Report.RegisterWarningRegion (Location).WarningDisable (Location, code, Report);
1955 if (arg.StartsWith (w_restore)) {
1956 int[] codes = ParseNumbers (arg.Substring (w_restore.Length));
1957 var w_table = Report.warning_ignore_table;
1958 foreach (int code in codes) {
1959 if (w_table != null && w_table.ContainsKey (code))
1960 Report.Warning (1635, 1, Location, "Cannot restore warning `CS{0:0000}' because it was disabled globally", code);
1961 Report.RegisterWarningRegion (Location).WarningEnable (Location, code, Report);
1966 if (arg.StartsWith (warning)) {
1967 Report.Warning (1634, 1, Location, "Expected disable or restore");
1971 if (arg.StartsWith (checksum)) {
1972 if (!PreProcessPragmaChecksum (arg.Substring (checksum.Length)))
1973 Warning_InvalidPragmaChecksum ();
1977 Report.Warning (1633, 1, Location, "Unrecognized #pragma directive");
1980 int[] ParseNumbers (string text)
1982 string[] string_array = text.Split (',');
1983 int[] values = new int [string_array.Length];
1985 foreach (string string_code in string_array) {
1987 values[index++] = int.Parse (string_code, System.Globalization.CultureInfo.InvariantCulture);
1989 catch (FormatException) {
1990 Report.Warning (1692, 1, Location, "Invalid number");
1996 bool eval_val (string s)
2003 return file_name.IsConditionalDefined (s);
2006 bool pp_primary (ref string s)
2015 s = s.Substring (1);
2016 bool val = pp_expr (ref s, false);
2017 if (s.Length > 0 && s [0] == ')'){
2018 s = s.Substring (1);
2021 Error_InvalidDirective ();
2025 if (is_identifier_start_character (c)){
2031 if (is_identifier_part_character (c)){
2035 bool v = eval_val (s.Substring (0, j));
2036 s = s.Substring (j);
2039 bool vv = eval_val (s);
2044 Error_InvalidDirective ();
2048 bool pp_unary (ref string s)
2055 if (len > 1 && s [1] == '='){
2056 Error_InvalidDirective ();
2059 s = s.Substring (1);
2060 return ! pp_primary (ref s);
2062 return pp_primary (ref s);
2064 Error_InvalidDirective ();
2069 bool pp_eq (ref string s)
2071 bool va = pp_unary (ref s);
2077 if (len > 2 && s [1] == '='){
2078 s = s.Substring (2);
2079 return va == pp_unary (ref s);
2081 Error_InvalidDirective ();
2084 } else if (s [0] == '!' && len > 1 && s [1] == '='){
2085 s = s.Substring (2);
2087 return va != pp_unary (ref s);
2096 bool pp_and (ref string s)
2098 bool va = pp_eq (ref s);
2104 if (len > 2 && s [1] == '&'){
2105 s = s.Substring (2);
2106 return (va & pp_and (ref s));
2108 Error_InvalidDirective ();
2117 // Evaluates an expression for `#if' or `#elif'
2119 bool pp_expr (ref string s, bool isTerm)
2121 bool va = pp_and (ref s);
2128 if (len > 2 && s [1] == '|'){
2129 s = s.Substring (2);
2130 return va | pp_expr (ref s, isTerm);
2132 Error_InvalidDirective ();
2137 Error_EndLineExpected ();
2145 bool eval (string s)
2147 bool v = pp_expr (ref s, true);
2156 void Error_NumericConstantTooLong ()
2158 Report.Error (1021, Location, "Numeric constant too long");
2161 void Error_InvalidDirective ()
2163 Report.Error (1517, Location, "Invalid preprocessor directive");
2166 void Error_UnexpectedDirective (string extra)
2170 "Unexpected processor directive ({0})", extra);
2173 void Error_TokensSeen ()
2175 Report.Error (1032, Location,
2176 "Cannot define or undefine preprocessor symbols after first token in file");
2179 void Eror_WrongPreprocessorLocation ()
2181 Report.Error (1040, Location,
2182 "Preprocessor directives must appear as the first non-whitespace character on a line");
2185 void Error_EndLineExpected ()
2187 Report.Error (1025, Location, "Single-line comment or end-of-line expected");
2190 void Warning_InvalidPragmaChecksum ()
2192 Report.Warning (1695, 1, Location,
2193 "Invalid #pragma checksum syntax; should be " +
2194 "#pragma checksum \"filename\" " +
2195 "\"{XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}\" \"XXXX...\"");
2198 // if true, then the code continues processing the code
2199 // if false, the code stays in a loop until another directive is
2201 // When caller_is_taking is false we ignore all directives except the ones
2202 // which can help us to identify where the #if block ends
2203 bool handle_preprocessing_directive (bool caller_is_taking)
2206 bool region_directive = false;
2208 get_cmd_arg (out cmd, out arg);
2210 // Eat any trailing whitespaces and single-line comments
2211 if (arg.IndexOf ("//") != -1)
2212 arg = arg.Substring (0, arg.IndexOf ("//"));
2213 arg = arg.Trim (simple_whitespaces);
2216 // The first group of pre-processing instructions is always processed
2220 region_directive = true;
2225 if (ifstack == null || ifstack.Count == 0){
2226 Error_UnexpectedDirective ("no #region for this #endregion");
2229 int pop = ifstack.Pop ();
2231 if ((pop & REGION) == 0)
2232 Report.Error (1027, Location, "Expected `#endif' directive");
2234 return caller_is_taking;
2237 if (ifstack == null)
2238 ifstack = new Stack<int> (2);
2240 int flags = region_directive ? REGION : 0;
2241 if (ifstack.Count == 0){
2242 flags |= PARENT_TAKING;
2244 int state = ifstack.Peek ();
2245 if ((state & TAKING) != 0) {
2246 flags |= PARENT_TAKING;
2250 if (caller_is_taking && eval (arg)) {
2251 ifstack.Push (flags | TAKING);
2254 ifstack.Push (flags);
2258 if (ifstack == null || ifstack.Count == 0){
2259 Error_UnexpectedDirective ("no #if for this #endif");
2262 pop = ifstack.Pop ();
2264 if ((pop & REGION) != 0)
2265 Report.Error (1038, Location, "#endregion directive expected");
2267 if (arg.Length != 0) {
2268 Error_EndLineExpected ();
2271 if (ifstack.Count == 0)
2274 int state = ifstack.Peek ();
2275 return (state & TAKING) != 0;
2279 if (ifstack == null || ifstack.Count == 0){
2280 Error_UnexpectedDirective ("no #if for this #elif");
2283 int state = ifstack.Pop ();
2285 if ((state & REGION) != 0) {
2286 Report.Error (1038, Location, "#endregion directive expected");
2290 if ((state & ELSE_SEEN) != 0){
2291 Error_UnexpectedDirective ("#elif not valid after #else");
2295 if ((state & TAKING) != 0) {
2300 if (eval (arg) && ((state & PARENT_TAKING) != 0)){
2301 ifstack.Push (state | TAKING);
2305 ifstack.Push (state);
2310 if (ifstack == null || ifstack.Count == 0){
2311 Error_UnexpectedDirective ("no #if for this #else");
2314 int state = ifstack.Peek ();
2316 if ((state & REGION) != 0) {
2317 Report.Error (1038, Location, "#endregion directive expected");
2321 if ((state & ELSE_SEEN) != 0){
2322 Error_UnexpectedDirective ("#else within #else");
2328 if (arg.Length != 0) {
2329 Error_EndLineExpected ();
2334 if ((state & PARENT_TAKING) != 0) {
2335 ret = (state & TAKING) == 0;
2343 ifstack.Push (state | ELSE_SEEN);
2348 if (any_token_seen){
2349 Error_TokensSeen ();
2350 return caller_is_taking;
2352 PreProcessDefinition (true, arg, caller_is_taking);
2353 return caller_is_taking;
2356 if (any_token_seen){
2357 Error_TokensSeen ();
2358 return caller_is_taking;
2360 PreProcessDefinition (false, arg, caller_is_taking);
2361 return caller_is_taking;
2365 // These are only processed if we are in a `taking' block
2367 if (!caller_is_taking)
2372 Report.Error (1029, Location, "#error: '{0}'", arg);
2376 Report.Warning (1030, 1, Location, "#warning: `{0}'", arg);
2380 if (RootContext.Version == LanguageVersion.ISO_1) {
2381 Report.FeatureIsNotAvailable (Location, "#pragma");
2385 PreProcessPragma (arg);
2389 if (!PreProcessLine (arg))
2392 "The line number specified for #line directive is missing or invalid");
2393 return caller_is_taking;
2396 Report.Error (1024, Location, "Wrong preprocessor directive");
2401 private int consume_string (bool quoted)
2404 string_builder.Length = 0;
2406 while ((c = get_char ()) != -1){
2408 if (quoted && peek_char () == '"'){
2409 string_builder.Append ((char) c);
2413 val = new StringLiteral (string_builder.ToString (), Location);
2414 return Token.LITERAL;
2420 Report.Error (1010, Location, "Newline in constant");
2425 c = escape (c, out surrogate);
2428 if (surrogate != 0) {
2429 string_builder.Append ((char) c);
2433 string_builder.Append ((char) c);
2436 Report.Error (1039, Location, "Unterminated string literal");
2440 private int consume_identifier (int s)
2442 int res = consume_identifier (s, false);
2444 if (doc_state == XmlCommentState.Allowed)
2445 doc_state = XmlCommentState.NotAllowed;
2450 int consume_identifier (int c, bool quoted)
2453 // This method is very performance sensitive. It accounts
2454 // for approximately 25% of all parser time
2462 c = escape (c, out surrogate);
2463 if (surrogate != 0) {
2464 id_builder [pos++] = (char) c;
2469 id_builder [pos++] = (char) c;
2475 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')) {
2476 id_builder [pos++] = (char) c;
2483 c = escape (c, out surrogate);
2484 if (surrogate != 0) {
2485 if (is_identifier_part_character ((char) c))
2486 id_builder[pos++] = (char) c;
2492 } else if (Char.IsLetter ((char) c) || Char.GetUnicodeCategory ((char) c) == UnicodeCategory.ConnectorPunctuation) {
2493 id_builder [pos++] = (char) c;
2500 } catch (IndexOutOfRangeException) {
2501 Report.Error (645, Location, "Identifier too long (limit is 512 chars)");
2509 // Optimization: avoids doing the keyword lookup
2510 // on uppercase letters
2512 if (id_builder [0] >= '_' && !quoted) {
2513 int keyword = GetKeyword (id_builder, pos);
2514 if (keyword != -1) {
2515 val = LocatedToken.Create (null, ref_line, column);
2521 // Keep identifiers in an array of hashtables to avoid needless
2524 var identifiers_group = identifiers [pos];
2526 if (identifiers_group != null) {
2527 if (identifiers_group.TryGetValue (id_builder, out s)) {
2528 val = LocatedToken.Create (s, ref_line, column);
2530 AddEscapedIdentifier (((LocatedToken) val).Location);
2531 return Token.IDENTIFIER;
2534 // TODO: this should be number of files dependant
2535 // corlib compilation peaks at 1000 and System.Core at 150
2536 int capacity = pos > 20 ? 10 : 100;
2537 identifiers_group = new Dictionary<char[],string> (capacity, new IdentifiersComparer (pos));
2538 identifiers [pos] = identifiers_group;
2541 char [] chars = new char [pos];
2542 Array.Copy (id_builder, chars, pos);
2544 s = new string (id_builder, 0, pos);
2545 identifiers_group.Add (chars, s);
2547 val = LocatedToken.Create (s, ref_line, column);
2549 AddEscapedIdentifier (((LocatedToken) val).Location);
2551 return Token.IDENTIFIER;
2554 public int xtoken ()
2558 // Whether we have seen comments on the current line
2559 bool comments_seen = false;
2560 while ((c = get_char ()) != -1) {
2563 col = ((col + 8) / 8) * 8;
2571 case 0xFEFF: // Ignore BOM anywhere in the file
2574 /* This is required for compatibility with .NET
2576 if (peek_char () == 0xBB) {
2579 if (get_char () == 0xBF)
2586 if (peek_char () != '\n')
2591 any_token_seen |= tokens_seen;
2592 tokens_seen = false;
2593 comments_seen = false;
2598 return consume_identifier (c);
2601 val = LocatedToken.Create (ref_line, col);
2602 return Token.OPEN_BRACE;
2604 val = LocatedToken.Create (ref_line, col);
2605 return Token.CLOSE_BRACE;
2607 // To block doccomment inside attribute declaration.
2608 if (doc_state == XmlCommentState.Allowed)
2609 doc_state = XmlCommentState.NotAllowed;
2610 return Token.OPEN_BRACKET;
2612 return Token.CLOSE_BRACKET;
2614 val = LocatedToken.Create (ref_line, col);
2616 // An expression versions of parens can appear in block context only
2618 if (parsing_block != 0 && !lambda_arguments_parsing) {
2621 // Optmize most common case where we know that parens
2624 switch (current_token) {
2625 case Token.IDENTIFIER:
2633 case Token.DELEGATE:
2634 case Token.OP_GENERICS_GT:
2635 return Token.OPEN_PARENS;
2638 // Optimize using peek
2639 int xx = peek_char ();
2646 return Token.OPEN_PARENS;
2649 lambda_arguments_parsing = true;
2651 d = TokenizeOpenParens ();
2653 lambda_arguments_parsing = false;
2657 return Token.OPEN_PARENS;
2659 return Token.CLOSE_PARENS;
2663 return Token.SEMICOLON;
2665 val = LocatedToken.Create (ref_line, col);
2668 val = LocatedToken.Create (ref_line, col);
2669 return TokenizePossibleNullableType ();
2671 if (parsing_generic_less_than++ > 0)
2672 return Token.OP_GENERICS_LT;
2674 return TokenizeLessThan ();
2684 if (parsing_generic_less_than > 1 || (parsing_generic_less_than == 1 && d != '>')) {
2685 parsing_generic_less_than--;
2686 return Token.OP_GENERICS_GT;
2695 return Token.OP_SHIFT_RIGHT_ASSIGN;
2697 return Token.OP_SHIFT_RIGHT;
2703 val = LocatedToken.Create (ref_line, col);
2707 } else if (d == '=') {
2708 d = Token.OP_ADD_ASSIGN;
2716 val = LocatedToken.Create (ref_line, col);
2720 } else if (d == '=')
2721 d = Token.OP_SUB_ASSIGN;
2731 val = LocatedToken.Create (ref_line, col);
2732 if (peek_char () == '='){
2739 val = LocatedToken.Create (ref_line, col);
2750 return Token.ASSIGN;
2753 val = LocatedToken.Create (ref_line, col);
2757 return Token.OP_AND;
2761 return Token.OP_AND_ASSIGN;
2763 return Token.BITWISE_AND;
2766 val = LocatedToken.Create (ref_line, col);
2774 return Token.OP_OR_ASSIGN;
2776 return Token.BITWISE_OR;
2779 val = LocatedToken.Create (ref_line, col);
2780 if (peek_char () == '='){
2782 return Token.OP_MULT_ASSIGN;
2789 val = LocatedToken.Create (ref_line, col);
2791 return Token.OP_DIV_ASSIGN;
2794 // Handle double-slash comments.
2797 if (RootContext.Documentation != null && peek_char () == '/') {
2799 // Don't allow ////.
2800 if ((d = peek_char ()) != '/') {
2801 update_comment_location ();
2802 if (doc_state == XmlCommentState.Allowed)
2803 handle_one_line_xml_comment ();
2804 else if (doc_state == XmlCommentState.NotAllowed)
2805 warn_incorrect_doc_comment ();
2808 while ((d = get_char ()) != -1 && (d != '\n') && d != '\r');
2810 any_token_seen |= tokens_seen;
2811 tokens_seen = false;
2812 comments_seen = false;
2814 } else if (d == '*'){
2816 bool docAppend = false;
2817 if (RootContext.Documentation != null && peek_char () == '*') {
2819 update_comment_location ();
2820 // But when it is /**/, just do nothing.
2821 if (peek_char () == '/') {
2825 if (doc_state == XmlCommentState.Allowed)
2827 else if (doc_state == XmlCommentState.NotAllowed)
2828 warn_incorrect_doc_comment ();
2831 int current_comment_start = 0;
2833 current_comment_start = xml_comment_buffer.Length;
2834 xml_comment_buffer.Append (Environment.NewLine);
2837 while ((d = get_char ()) != -1){
2838 if (d == '*' && peek_char () == '/'){
2840 comments_seen = true;
2844 xml_comment_buffer.Append ((char) d);
2847 any_token_seen |= tokens_seen;
2848 tokens_seen = false;
2850 // Reset 'comments_seen' just to be consistent.
2851 // It doesn't matter either way, here.
2853 comments_seen = false;
2857 Report.Error (1035, Location, "End-of-file found, '*/' expected");
2860 update_formatted_doc_comment (current_comment_start);
2866 val = LocatedToken.Create (ref_line, col);
2867 if (peek_char () == '='){
2869 return Token.OP_MOD_ASSIGN;
2871 return Token.PERCENT;
2874 val = LocatedToken.Create (ref_line, col);
2875 if (peek_char () == '='){
2877 return Token.OP_XOR_ASSIGN;
2879 return Token.CARRET;
2882 val = LocatedToken.Create (ref_line, col);
2883 if (peek_char () == ':') {
2885 return Token.DOUBLE_COLON;
2889 case '0': case '1': case '2': case '3': case '4':
2890 case '5': case '6': case '7': case '8': case '9':
2892 return is_number (c);
2894 case '\n': // white space
2895 any_token_seen |= tokens_seen;
2896 tokens_seen = false;
2897 comments_seen = false;
2903 if (d >= '0' && d <= '9')
2904 return is_number (c);
2908 if (tokens_seen || comments_seen) {
2909 Eror_WrongPreprocessorLocation ();
2913 if (handle_preprocessing_directive (true))
2916 bool directive_expected = false;
2917 while ((c = get_char ()) != -1) {
2919 directive_expected = true;
2920 } else if (!directive_expected) {
2921 // TODO: Implement comment support for disabled code and uncomment this code
2923 // Eror_WrongPreprocessorLocation ();
2924 // return Token.ERROR;
2929 if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v' )
2933 if (handle_preprocessing_directive (false))
2936 directive_expected = false;
2940 tokens_seen = false;
2947 return consume_string (false);
2950 return TokenizeBackslash ();
2956 return consume_string (true);
2959 if (is_identifier_start_character (c)){
2960 return consume_identifier (c, true);
2963 Report.Error (1646, Location, "Keyword, identifier, or string expected after verbatim specifier: @");
2966 case EvalStatementParserCharacter:
2967 return Token.EVAL_STATEMENT_PARSER;
2968 case EvalCompilationUnitParserCharacter:
2969 return Token.EVAL_COMPILATION_UNIT_PARSER;
2970 case EvalUsingDeclarationsParserCharacter:
2971 return Token.EVAL_USING_DECLARATIONS_UNIT_PARSER;
2974 if (is_identifier_start_character (c)) {
2976 return consume_identifier (c);
2979 error_details = ((char)c).ToString ();
2985 return Token.COMPLETE_COMPLETION;
2988 return Token.GENERATE_COMPLETION;
2995 int TokenizeBackslash ()
2997 int c = get_char ();
3000 error_details = "Empty character literal";
3001 Report.Error (1011, Location, error_details);
3004 if (c == '\r' || c == '\n') {
3005 Report.Error (1010, Location, "Newline in constant");
3010 c = escape (c, out d);
3014 throw new NotImplementedException ();
3016 val = new CharLiteral ((char) c, Location);
3020 Report.Error (1012, Location, "Too many characters in character literal");
3022 // Try to recover, read until newline or next "'"
3023 while ((c = get_char ()) != -1) {
3024 if (c == '\n' || c == '\'')
3030 return Token.LITERAL;
3033 int TokenizeLessThan ()
3036 if (handle_typeof) {
3038 if (parse_generic_dimension (out d)) {
3041 return Token.GENERIC_DIMENSION;
3046 // Save current position and parse next token.
3048 if (parse_less_than ()) {
3049 if (parsing_generic_declaration && token () != Token.DOT) {
3050 d = Token.OP_GENERICS_LT_DECL;
3052 d = Token.OP_GENERICS_LT;
3059 parsing_generic_less_than = 0;
3068 return Token.OP_SHIFT_LEFT_ASSIGN;
3070 return Token.OP_SHIFT_LEFT;
3081 // Handles one line xml comment
3083 private void handle_one_line_xml_comment ()
3086 while ((c = peek_char ()) == ' ')
3087 get_char (); // skip heading whitespaces.
3088 while ((c = peek_char ()) != -1 && c != '\n' && c != '\r') {
3089 xml_comment_buffer.Append ((char) get_char ());
3091 if (c == '\r' || c == '\n')
3092 xml_comment_buffer.Append (Environment.NewLine);
3096 // Remove heading "*" in Javadoc-like xml documentation.
3098 private void update_formatted_doc_comment (int current_comment_start)
3100 int length = xml_comment_buffer.Length - current_comment_start;
3101 string [] lines = xml_comment_buffer.ToString (
3102 current_comment_start,
3103 length).Replace ("\r", "").Split ('\n');
3105 // The first line starts with /**, thus it is not target
3106 // for the format check.
3107 for (int i = 1; i < lines.Length; i++) {
3108 string s = lines [i];
3109 int idx = s.IndexOf ('*');
3112 if (i < lines.Length - 1)
3116 head = s.Substring (0, idx);
3117 foreach (char c in head)
3120 lines [i] = s.Substring (idx + 1);
3122 xml_comment_buffer.Remove (current_comment_start, length);
3123 xml_comment_buffer.Insert (current_comment_start, String.Join (Environment.NewLine, lines));
3127 // Updates current comment location.
3129 private void update_comment_location ()
3131 if (current_comment_location.IsNull) {
3132 // "-2" is for heading "//" or "/*"
3133 current_comment_location =
3134 new Location (ref_line, hidden ? -1 : col - 2);
3139 // Checks if there was incorrect doc comments and raise
3142 public void check_incorrect_doc_comment ()
3144 if (xml_comment_buffer.Length > 0)
3145 warn_incorrect_doc_comment ();
3149 // Raises a warning when tokenizer found incorrect doccomment
3152 private void warn_incorrect_doc_comment ()
3154 if (doc_state != XmlCommentState.Error) {
3155 doc_state = XmlCommentState.Error;
3156 // in csc, it is 'XML comment is not placed on
3157 // a valid language element'. But that does not
3159 Report.Warning (1587, 2, Location, "XML comment is not placed on a valid language element");
3164 // Consumes the saved xml comment lines (if any)
3165 // as for current target member or type.
3167 public string consume_doc_comment ()
3169 if (xml_comment_buffer.Length > 0) {
3170 string ret = xml_comment_buffer.ToString ();
3171 reset_doc_comment ();
3178 get { return context.Report; }
3181 void reset_doc_comment ()
3183 xml_comment_buffer.Length = 0;
3184 current_comment_location = Location.Null;
3187 public void cleanup ()
3189 if (ifstack != null && ifstack.Count >= 1) {
3190 int state = ifstack.Pop ();
3191 if ((state & REGION) != 0)
3192 Report.Error (1038, Location, "#endregion directive expected");
3194 Report.Error (1027, Location, "Expected `#endif' directive");
3200 // Indicates whether it accepts XML documentation or not.
3202 public enum XmlCommentState {
3203 // comment is allowed in this state.
3205 // comment is not allowed in this state.
3207 // once comments appeared when it is NotAllowed, then the
3208 // state is changed to it, until the state is changed to