2 // cs-tokenizer.cs: The Tokenizer for the C# compiler
3 // This also implements the preprocessor
5 // Author: Miguel de Icaza (miguel@gnu.org)
7 // Licensed under the terms of the GNU GPL
9 // (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)
10 // (C) 2004 Novell, Inc
15 * Make sure we accept the proper Unicode ranges, per the spec.
21 using System.Collections;
23 using System.Globalization;
24 using System.Reflection;
29 /// Tokenizer for C# source code.
32 public class Tokenizer : yyParser.yyInput
34 SeekableStreamReader reader;
35 public SourceFile ref_name;
36 public SourceFile file_name;
37 public int ref_line = 1;
40 public int current_token;
41 bool handle_get_set = false;
42 bool handle_remove_add = false;
43 bool handle_assembly = false;
46 // XML documentation buffer. The save point is used to divide
47 // comments on types and comments on members.
49 StringBuilder xml_comment_buffer;
52 // See comment on XmlCommentState enumeration.
54 XmlCommentState xmlDocState = XmlCommentState.Allowed;
57 // Whether tokens have been seen on this line
59 bool tokens_seen = false;
62 // Whether a token has been seen on the file
63 // This is needed because `define' is not allowed to be used
64 // after a token has been seen.
66 bool any_token_seen = false;
68 static Hashtable tokenValues;
70 private static Hashtable TokenValueName
73 if (tokenValues == null)
74 tokenValues = GetTokenValueNameHash ();
80 private static Hashtable GetTokenValueNameHash ()
82 Type t = typeof (Token);
83 FieldInfo [] fields = t.GetFields ();
84 Hashtable hash = new Hashtable ();
85 foreach (FieldInfo field in fields) {
86 if (field.IsLiteral && field.IsStatic && field.FieldType == typeof (int))
87 hash.Add (field.GetValue (null), field.Name);
93 // Returns a verbose representation of the current location
95 public string location {
99 if (current_token == Token.ERROR)
100 det = "detail: " + error_details;
104 // return "Line: "+line+" Col: "+col + "\n" +
105 // "VirtLine: "+ref_line +
106 // " Token: "+current_token + " " + det;
107 string current_token_name = TokenValueName [current_token] as string;
108 if (current_token_name == null)
109 current_token_name = current_token.ToString ();
111 return String.Format ("{0} ({1},{2}), Token: {3} {4}", ref_name.Name,
119 public bool PropertyParsing {
121 return handle_get_set;
125 handle_get_set = value;
129 public bool AssemblyTargetParsing {
131 return handle_assembly;
135 handle_assembly = value;
139 public bool EventParsing {
141 return handle_remove_add;
145 handle_remove_add = value;
149 public XmlCommentState doc_state {
150 get { return xmlDocState; }
152 if (value == XmlCommentState.Allowed) {
153 check_incorrect_doc_comment ();
154 consume_doc_comment ();
164 static CharArrayHashtable[] keywords;
165 static NumberStyles styles;
166 static NumberFormatInfo csharp_format_info;
169 // Values for the associated token returned
179 const int TAKING = 1;
180 const int TAKEN_BEFORE = 2;
181 const int ELSE_SEEN = 4;
182 const int PARENT_TAKING = 8;
183 const int REGION = 16;
186 // pre-processor if stack state:
190 static System.Text.StringBuilder string_builder;
192 const int max_id_size = 512;
193 static char [] id_builder = new char [max_id_size];
195 static CharArrayHashtable [] identifiers = new CharArrayHashtable [max_id_size + 1];
197 const int max_number_size = 128;
198 static char [] number_builder = new char [max_number_size];
199 static int number_pos;
202 // Details about the error encoutered by the tokenizer
204 string error_details;
206 public string error {
208 return error_details;
224 static void AddKeyword (string kw, int token) {
225 if (keywords [kw.Length] == null) {
226 keywords [kw.Length] = new CharArrayHashtable (kw.Length);
228 keywords [kw.Length] [kw.ToCharArray ()] = token;
231 static void InitTokens ()
233 keywords = new CharArrayHashtable [64];
235 AddKeyword ("__arglist", Token.ARGLIST);
236 AddKeyword ("abstract", Token.ABSTRACT);
237 AddKeyword ("as", Token.AS);
238 AddKeyword ("add", Token.ADD);
239 AddKeyword ("assembly", Token.ASSEMBLY);
240 AddKeyword ("base", Token.BASE);
241 AddKeyword ("bool", Token.BOOL);
242 AddKeyword ("break", Token.BREAK);
243 AddKeyword ("byte", Token.BYTE);
244 AddKeyword ("case", Token.CASE);
245 AddKeyword ("catch", Token.CATCH);
246 AddKeyword ("char", Token.CHAR);
247 AddKeyword ("checked", Token.CHECKED);
248 AddKeyword ("class", Token.CLASS);
249 AddKeyword ("const", Token.CONST);
250 AddKeyword ("continue", Token.CONTINUE);
251 AddKeyword ("decimal", Token.DECIMAL);
252 AddKeyword ("default", Token.DEFAULT);
253 AddKeyword ("delegate", Token.DELEGATE);
254 AddKeyword ("do", Token.DO);
255 AddKeyword ("double", Token.DOUBLE);
256 AddKeyword ("else", Token.ELSE);
257 AddKeyword ("enum", Token.ENUM);
258 AddKeyword ("event", Token.EVENT);
259 AddKeyword ("explicit", Token.EXPLICIT);
260 AddKeyword ("extern", Token.EXTERN);
261 AddKeyword ("false", Token.FALSE);
262 AddKeyword ("finally", Token.FINALLY);
263 AddKeyword ("fixed", Token.FIXED);
264 AddKeyword ("float", Token.FLOAT);
265 AddKeyword ("for", Token.FOR);
266 AddKeyword ("foreach", Token.FOREACH);
267 AddKeyword ("goto", Token.GOTO);
268 AddKeyword ("get", Token.GET);
269 AddKeyword ("if", Token.IF);
270 AddKeyword ("implicit", Token.IMPLICIT);
271 AddKeyword ("in", Token.IN);
272 AddKeyword ("int", Token.INT);
273 AddKeyword ("interface", Token.INTERFACE);
274 AddKeyword ("internal", Token.INTERNAL);
275 AddKeyword ("is", Token.IS);
276 AddKeyword ("lock", Token.LOCK);
277 AddKeyword ("long", Token.LONG);
278 AddKeyword ("namespace", Token.NAMESPACE);
279 AddKeyword ("new", Token.NEW);
280 AddKeyword ("null", Token.NULL);
281 AddKeyword ("object", Token.OBJECT);
282 AddKeyword ("operator", Token.OPERATOR);
283 AddKeyword ("out", Token.OUT);
284 AddKeyword ("override", Token.OVERRIDE);
285 AddKeyword ("params", Token.PARAMS);
286 AddKeyword ("private", Token.PRIVATE);
287 AddKeyword ("protected", Token.PROTECTED);
288 AddKeyword ("public", Token.PUBLIC);
289 AddKeyword ("readonly", Token.READONLY);
290 AddKeyword ("ref", Token.REF);
291 AddKeyword ("remove", Token.REMOVE);
292 AddKeyword ("return", Token.RETURN);
293 AddKeyword ("sbyte", Token.SBYTE);
294 AddKeyword ("sealed", Token.SEALED);
295 AddKeyword ("set", Token.SET);
296 AddKeyword ("short", Token.SHORT);
297 AddKeyword ("sizeof", Token.SIZEOF);
298 AddKeyword ("stackalloc", Token.STACKALLOC);
299 AddKeyword ("static", Token.STATIC);
300 AddKeyword ("string", Token.STRING);
301 AddKeyword ("struct", Token.STRUCT);
302 AddKeyword ("switch", Token.SWITCH);
303 AddKeyword ("this", Token.THIS);
304 AddKeyword ("throw", Token.THROW);
305 AddKeyword ("true", Token.TRUE);
306 AddKeyword ("try", Token.TRY);
307 AddKeyword ("typeof", Token.TYPEOF);
308 AddKeyword ("uint", Token.UINT);
309 AddKeyword ("ulong", Token.ULONG);
310 AddKeyword ("unchecked", Token.UNCHECKED);
311 AddKeyword ("unsafe", Token.UNSAFE);
312 AddKeyword ("ushort", Token.USHORT);
313 AddKeyword ("using", Token.USING);
314 AddKeyword ("virtual", Token.VIRTUAL);
315 AddKeyword ("void", Token.VOID);
316 AddKeyword ("volatile", Token.VOLATILE);
317 AddKeyword ("while", Token.WHILE);
318 AddKeyword ("partial", Token.PARTIAL);
327 csharp_format_info = NumberFormatInfo.InvariantInfo;
328 styles = NumberStyles.Float;
330 string_builder = new System.Text.StringBuilder ();
333 int GetKeyword (char[] id, int id_len)
336 * Keywords are stored in an array of hashtables grouped by their
340 if ((id_len >= keywords.Length) || (keywords [id_len] == null))
342 object o = keywords [id_len] [id];
349 if (handle_get_set == false && (res == Token.GET || res == Token.SET))
351 if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD))
353 if (handle_assembly == false && res == Token.ASSEMBLY)
360 public Location Location {
362 return new Location (ref_line);
366 void define (string def)
368 if (!RootContext.AllDefines.Contains (def)){
369 RootContext.AllDefines [def] = true;
371 if (defines.Contains (def))
373 defines [def] = true;
376 public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList defs)
378 this.ref_name = file;
379 this.file_name = file;
385 defines = new Hashtable ();
386 foreach (string def in defs)
390 xml_comment_buffer = new StringBuilder ();
393 // FIXME: This could be `Location.Push' but we have to
394 // find out why the MS compiler allows this
396 Mono.CSharp.Location.Push (file);
399 public static void Cleanup () {
403 static bool is_identifier_start_character (char c)
405 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c);
408 static bool is_identifier_part_character (char c)
410 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);
413 public static bool IsValidIdentifier (string s)
415 if (s == null || s.Length == 0)
418 if (!is_identifier_start_character (s [0]))
421 for (int i = 1; i < s.Length; i ++)
422 if (! is_identifier_part_character (s [i]))
428 int is_punct (char c, ref bool doread)
437 return Token.OPEN_BRACE;
439 return Token.CLOSE_BRACE;
441 // To block doccomment inside attribute declaration.
442 if (doc_state == XmlCommentState.Allowed)
443 doc_state = XmlCommentState.NotAllowed;
444 return Token.OPEN_BRACKET;
446 return Token.CLOSE_BRACKET;
448 return Token.OPEN_PARENS;
450 if (deambiguate_close_parens == 0)
451 return Token.CLOSE_PARENS;
453 --deambiguate_close_parens;
455 // Save current position and parse next token.
456 int old = reader.Position;
457 int new_token = token ();
458 reader.Position = old;
461 if (new_token == Token.OPEN_PARENS)
462 return Token.CLOSE_PARENS_OPEN_PARENS;
463 else if (new_token == Token.MINUS)
464 return Token.CLOSE_PARENS_MINUS;
465 else if (IsCastToken (new_token))
466 return Token.CLOSE_PARENS_CAST;
468 return Token.CLOSE_PARENS_NO_CAST;
476 return Token.SEMICOLON;
489 t = Token.OP_ADD_ASSIGN;
499 t = Token.OP_SUB_ASSIGN;
528 } else if (d == '='){
530 return Token.OP_AND_ASSIGN;
532 return Token.BITWISE_AND;
539 } else if (d == '='){
541 return Token.OP_OR_ASSIGN;
543 return Token.BITWISE_OR;
549 return Token.OP_MULT_ASSIGN;
557 return Token.OP_DIV_ASSIGN;
565 return Token.OP_MOD_ASSIGN;
567 return Token.PERCENT;
573 return Token.OP_XOR_ASSIGN;
585 return Token.OP_SHIFT_LEFT_ASSIGN;
587 return Token.OP_SHIFT_LEFT;
588 } else if (d == '='){
602 return Token.OP_SHIFT_RIGHT_ASSIGN;
604 return Token.OP_SHIFT_RIGHT;
605 } else if (d == '='){
614 int deambiguate_close_parens = 0;
616 public void Deambiguate_CloseParens ()
619 deambiguate_close_parens++;
622 void Error_NumericConstantTooLong ()
624 Report.Error (1021, Location, "Numeric constant too long");
627 bool decimal_digits (int c)
630 bool seen_digits = false;
633 if (number_pos == max_number_size)
634 Error_NumericConstantTooLong ();
635 number_builder [number_pos++] = (char) c;
639 // We use peekChar2, because decimal_digits needs to do a
640 // 2-character look-ahead (5.ToString for example).
642 while ((d = peekChar2 ()) != -1){
643 if (d >= '0' && d <= '9'){
644 if (number_pos == max_number_size)
645 Error_NumericConstantTooLong ();
646 number_builder [number_pos++] = (char) d;
658 return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
661 void hex_digits (int c)
664 number_builder [number_pos++] = (char) c;
668 int real_type_suffix (int c)
674 t = Token.LITERAL_FLOAT;
677 t = Token.LITERAL_DOUBLE;
680 t= Token.LITERAL_DECIMAL;
688 int integer_type_suffix (ulong ul, int c)
690 bool is_unsigned = false;
691 bool is_long = false;
694 bool scanning = true;
705 if (!is_unsigned && (RootContext.WarningLevel >= 4)){
707 // if we have not seen anything in between
710 Report.Warning (78, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
713 // This goto statement causes the MS CLR 2.0 beta 1 csc to report an error, so
738 if (is_long && is_unsigned){
740 return Token.LITERAL_INTEGER;
741 } else if (is_unsigned){
742 // uint if possible, or ulong else.
744 if ((ul & 0xffffffff00000000) == 0)
749 // long if possible, ulong otherwise
750 if ((ul & 0x8000000000000000) != 0)
755 // int, uint, long or ulong in that order
756 if ((ul & 0xffffffff00000000) == 0){
759 if ((ui & 0x80000000) != 0)
764 if ((ul & 0x8000000000000000) != 0)
770 return Token.LITERAL_INTEGER;
774 // given `c' as the next char in the input decide whether
775 // we need to convert to a special type, and then choose
776 // the best representation for the integer
778 int adjust_int (int c)
782 ulong ul = (uint) (number_builder [0] - '0');
784 for (int i = 1; i < number_pos; i++){
785 ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0')));
787 return integer_type_suffix (ul, c);
789 uint ui = (uint) (number_builder [0] - '0');
791 for (int i = 1; i < number_pos; i++){
792 ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0')));
794 return integer_type_suffix (ui, c);
796 } catch (OverflowException) {
797 error_details = "Integral constant is too large";
798 Report.Error (1021, Location, error_details);
800 return Token.LITERAL_INTEGER;
804 int adjust_real (int t)
806 string s = new String (number_builder, 0, number_pos);
809 case Token.LITERAL_DECIMAL:
811 val = System.Decimal.Parse (s, styles, csharp_format_info);
812 } catch (OverflowException) {
814 error_details = "Floating-point constant is outside the range of the type 'decimal'";
815 Report.Error (594, Location, error_details);
818 case Token.LITERAL_FLOAT:
820 val = (float) System.Double.Parse (s, styles, csharp_format_info);
821 } catch (OverflowException) {
823 error_details = "Floating-point constant is outside the range of the type 'float'";
824 Report.Error (594, Location, error_details);
828 case Token.LITERAL_DOUBLE:
830 t = Token.LITERAL_DOUBLE;
832 val = System.Double.Parse (s, styles, csharp_format_info);
833 } catch (OverflowException) {
835 error_details = "Floating-point constant is outside the range of the type 'double'";
836 Report.Error (594, Location, error_details);
849 while ((d = peekChar ()) != -1){
851 number_builder [number_pos++] = (char) d;
857 string s = new String (number_builder, 0, number_pos);
860 ul = System.UInt32.Parse (s, NumberStyles.HexNumber);
862 ul = System.UInt64.Parse (s, NumberStyles.HexNumber);
863 } catch (OverflowException){
864 error_details = "Integral constant is too large";
865 Report.Error (1021, Location, error_details);
867 return Token.LITERAL_INTEGER;
869 catch (FormatException) {
870 Report.Error (1013, Location, "Invalid number");
872 return Token.LITERAL_INTEGER;
875 return integer_type_suffix (ul, peekChar ());
879 // Invoked if we know we have .digits or digits
881 int is_number (int c)
883 bool is_real = false;
888 if (c >= '0' && c <= '9'){
890 int peek = peekChar ();
892 if (peek == 'x' || peek == 'X')
893 return handle_hex ();
900 // We need to handle the case of
901 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
904 if (decimal_digits ('.')){
910 return adjust_int (-1);
914 if (c == 'e' || c == 'E'){
916 if (number_pos == max_number_size)
917 Error_NumericConstantTooLong ();
918 number_builder [number_pos++] = 'e';
922 if (number_pos == max_number_size)
923 Error_NumericConstantTooLong ();
924 number_builder [number_pos++] = '+';
926 } else if (c == '-') {
927 if (number_pos == max_number_size)
928 Error_NumericConstantTooLong ();
929 number_builder [number_pos++] = '-';
932 if (number_pos == max_number_size)
933 Error_NumericConstantTooLong ();
934 number_builder [number_pos++] = '+';
941 type = real_type_suffix (c);
942 if (type == Token.NONE && !is_real){
944 return adjust_int (c);
948 if (type == Token.NONE){
953 return adjust_real (type);
955 Console.WriteLine ("This should not be reached");
956 throw new Exception ("Is Number should never reach this point");
960 // Accepts exactly count (4 or 8) hex, no more no less
962 int getHex (int count, out bool error)
967 int top = count != -1 ? count : 4;
971 for (i = 0; i < top; i++){
974 if (c >= '0' && c <= '9')
975 c = (int) c - (int) '0';
976 else if (c >= 'A' && c <= 'F')
977 c = (int) c - (int) 'A' + 10;
978 else if (c >= 'a' && c <= 'f')
979 c = (int) c - (int) 'a' + 10;
985 total = (total * 16) + c;
990 if (!is_hex ((char)p))
1031 v = getHex (-1, out error);
1036 v = getHex (4, out error);
1041 v = getHex (8, out error);
1046 Report.Error (1009, Location, "Unrecognized escape sequence in " + (char)d);
1055 if (putback_char != -1){
1056 int x = putback_char;
1061 return reader.Read ();
1066 if (putback_char != -1)
1067 return putback_char;
1068 putback_char = reader.Read ();
1069 return putback_char;
1074 if (putback_char != -1)
1075 return putback_char;
1076 return reader.Peek ();
1079 void putback (int c)
1081 if (putback_char != -1){
1082 Console.WriteLine ("Col: " + col);
1083 Console.WriteLine ("Row: " + line);
1084 Console.WriteLine ("Name: " + ref_name.Name);
1085 Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);
1086 throw new Exception ("This should not happen putback on putback");
1091 public bool advance ()
1093 return peekChar () != -1;
1096 public Object Value {
1102 public Object value ()
1107 bool IsCastToken (int token)
1112 case Token.IDENTIFIER:
1113 case Token.LITERAL_INTEGER:
1114 case Token.LITERAL_FLOAT:
1115 case Token.LITERAL_DOUBLE:
1116 case Token.LITERAL_DECIMAL:
1117 case Token.LITERAL_CHARACTER:
1118 case Token.LITERAL_STRING:
1130 case Token.UNCHECKED:
1134 // These can be part of a member access
1154 current_token = xtoken ();
1155 return current_token;
1158 static StringBuilder static_cmd_arg = new System.Text.StringBuilder ();
1160 void get_cmd_arg (out string cmd, out string arg)
1164 tokens_seen = false;
1166 static_cmd_arg.Length = 0;
1168 // skip over white space
1169 while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t')))
1173 while ((c != -1) && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){
1174 if (is_identifier_part_character ((char) c)){
1175 static_cmd_arg.Append ((char) c);
1183 cmd = static_cmd_arg.ToString ();
1189 } else if (c == '\r')
1192 // skip over white space
1193 while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t')))
1200 } else if (c == '\r'){
1205 static_cmd_arg.Length = 0;
1206 static_cmd_arg.Append ((char) c);
1208 while ((c = getChar ()) != -1 && (c != '\n') && (c != '\r')){
1209 static_cmd_arg.Append ((char) c);
1215 } else if (c == '\r')
1217 arg = static_cmd_arg.ToString ().Trim ();
1221 // Handles the #line directive
1223 bool PreProcessLine (string arg)
1228 if (arg == "default"){
1230 ref_name = file_name;
1231 Location.Push (ref_name);
1233 } else if (arg == "hidden"){
1235 // We ignore #line hidden
1243 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
1244 ref_line = System.Int32.Parse (arg.Substring (0, pos));
1247 char [] quotes = { '\"' };
1249 string name = arg.Substring (pos). Trim (quotes);
1250 ref_name = Location.LookupFile (name);
1251 file_name.HasLineDirective = true;
1252 ref_name.HasLineDirective = true;
1253 Location.Push (ref_name);
1255 ref_line = System.Int32.Parse (arg);
1265 // Handles #define and #undef
1267 void PreProcessDefinition (bool is_define, string arg)
1269 if (arg == "" || arg == "true" || arg == "false"){
1270 Report.Error (1001, Location, "Missing identifer to pre-processor directive");
1274 char[] whitespace = { ' ', '\t' };
1275 if (arg.IndexOfAny (whitespace) != -1){
1276 Report.Error (1025, Location, "Single-line comment or end-of-line expected");
1280 if (!is_identifier_start_character (arg [0]))
1281 Report.Error (1001, Location, "Identifier expected: " + arg);
1283 foreach (char c in arg.Substring (1)){
1284 if (!is_identifier_part_character (c)){
1285 Report.Error (1001, Location, "Identifier expected: " + arg);
1291 if (defines == null)
1292 defines = new Hashtable ();
1295 if (defines == null)
1297 if (defines.Contains (arg))
1298 defines.Remove (arg);
1303 /// Handles #pragma directive
1305 void PreProcessPragma (string arg)
1307 const string warning = "warning";
1308 const string w_disable = warning + " disable";
1309 const string w_restore = warning + " restore";
1311 if (arg == w_disable) {
1312 Report.RegisterWarningRegion (Location).WarningDisable (line);
1316 if (arg == w_restore) {
1317 Report.RegisterWarningRegion (Location).WarningEnable (line);
1321 if (arg.StartsWith (w_disable)) {
1322 int[] codes = ParseNumbers (arg.Substring (w_disable.Length));
1323 foreach (int code in codes) {
1325 Report.RegisterWarningRegion (Location).WarningDisable (Location, code);
1330 if (arg.StartsWith (w_restore)) {
1331 int[] codes = ParseNumbers (arg.Substring (w_restore.Length));
1332 foreach (int code in codes) {
1333 Report.RegisterWarningRegion (Location).WarningEnable (Location, code);
1338 if (arg.StartsWith (warning)) {
1339 Report.Warning (1634, 1, Location , "Expected disable or restore");
1344 int[] ParseNumbers (string text)
1346 string[] string_array = text.Split (',');
1347 int[] values = new int [string_array.Length];
1349 foreach (string string_code in string_array) {
1351 values[index++] = int.Parse (string_code, System.Globalization.CultureInfo.InvariantCulture);
1353 catch (FormatException) {
1354 Report.Warning (1692, Location, "Invalid number");
1360 bool eval_val (string s)
1367 if (defines == null)
1369 if (defines.Contains (s))
1375 bool pp_primary (ref string s)
1384 s = s.Substring (1);
1385 bool val = pp_expr (ref s);
1386 if (s.Length > 0 && s [0] == ')'){
1387 s = s.Substring (1);
1390 Error_InvalidDirective ();
1394 if (is_identifier_start_character (c)){
1400 if (is_identifier_part_character (c)){
1404 bool v = eval_val (s.Substring (0, j));
1405 s = s.Substring (j);
1408 bool vv = eval_val (s);
1413 Error_InvalidDirective ();
1417 bool pp_unary (ref string s)
1424 if (len > 1 && s [1] == '='){
1425 Error_InvalidDirective ();
1428 s = s.Substring (1);
1429 return ! pp_primary (ref s);
1431 return pp_primary (ref s);
1433 Error_InvalidDirective ();
1438 bool pp_eq (ref string s)
1440 bool va = pp_unary (ref s);
1446 if (len > 2 && s [1] == '='){
1447 s = s.Substring (2);
1448 return va == pp_unary (ref s);
1450 Error_InvalidDirective ();
1453 } else if (s [0] == '!' && len > 1 && s [1] == '='){
1454 s = s.Substring (2);
1456 return va != pp_unary (ref s);
1465 bool pp_and (ref string s)
1467 bool va = pp_eq (ref s);
1473 if (len > 2 && s [1] == '&'){
1474 s = s.Substring (2);
1475 return (va & pp_and (ref s));
1477 Error_InvalidDirective ();
1486 // Evaluates an expression for `#if' or `#elif'
1488 bool pp_expr (ref string s)
1490 bool va = pp_and (ref s);
1497 if (len > 2 && s [1] == '|'){
1498 s = s.Substring (2);
1499 return va | pp_expr (ref s);
1501 Error_InvalidDirective ();
1510 bool eval (string s)
1512 bool v = pp_expr (ref s);
1515 Error_InvalidDirective ();
1522 void Error_InvalidDirective ()
1524 Report.Error (1517, Location, "Invalid pre-processor directive");
1527 void Error_UnexpectedDirective (string extra)
1531 "Unexpected processor directive (" + extra + ")");
1534 void Error_TokensSeen ()
1538 "Cannot define or undefine pre-processor symbols after a token in the file");
1542 // if true, then the code continues processing the code
1543 // if false, the code stays in a loop until another directive is
1546 bool handle_preprocessing_directive (bool caller_is_taking)
1549 bool region_directive = false;
1551 get_cmd_arg (out cmd, out arg);
1553 // Eat any trailing whitespaces and single-line comments
1554 if (arg.IndexOf ("//") != -1)
1555 arg = arg.Substring (0, arg.IndexOf ("//"));
1556 arg = arg.TrimEnd (' ', '\t');
1559 // The first group of pre-processing instructions is always processed
1563 if (RootContext.Version == LanguageVersion.ISO_1) {
1564 Report.FeatureIsNotStandardized (Location, "#pragma");
1565 return caller_is_taking;
1568 PreProcessPragma (arg);
1569 return caller_is_taking;
1572 if (!PreProcessLine (arg))
1575 "Argument to #line directive is missing or invalid");
1576 return caller_is_taking;
1579 region_directive = true;
1584 region_directive = true;
1589 Error_InvalidDirective ();
1592 bool taking = false;
1593 if (ifstack == null)
1594 ifstack = new Stack ();
1596 if (ifstack.Count == 0){
1599 int state = (int) ifstack.Peek ();
1600 if ((state & TAKING) != 0)
1604 if (eval (arg) && taking){
1605 int push = TAKING | TAKEN_BEFORE | PARENT_TAKING;
1606 if (region_directive)
1608 ifstack.Push (push);
1611 int push = (taking ? PARENT_TAKING : 0);
1612 if (region_directive)
1614 ifstack.Push (push);
1619 if (ifstack == null || ifstack.Count == 0){
1620 Error_UnexpectedDirective ("no #if for this #endif");
1623 int pop = (int) ifstack.Pop ();
1625 if (region_directive && ((pop & REGION) == 0))
1626 Report.Error (1027, Location, "#endif directive expected");
1627 else if (!region_directive && ((pop & REGION) != 0))
1628 Report.Error (1038, Location, "#endregion directive expected");
1630 if (ifstack.Count == 0)
1633 int state = (int) ifstack.Peek ();
1635 if ((state & TAKING) != 0)
1643 if (ifstack == null || ifstack.Count == 0){
1644 Error_UnexpectedDirective ("no #if for this #elif");
1647 int state = (int) ifstack.Peek ();
1649 if ((state & REGION) != 0) {
1650 Report.Error (1038, Location, "#endregion directive expected");
1654 if ((state & ELSE_SEEN) != 0){
1655 Error_UnexpectedDirective ("#elif not valid after #else");
1659 if ((state & (TAKEN_BEFORE | TAKING)) != 0)
1662 if (eval (arg) && ((state & PARENT_TAKING) != 0)){
1663 state = (int) ifstack.Pop ();
1664 ifstack.Push (state | TAKING | TAKEN_BEFORE);
1671 if (ifstack == null || ifstack.Count == 0){
1674 "Unexpected processor directive (no #if for this #else)");
1677 int state = (int) ifstack.Peek ();
1679 if ((state & REGION) != 0) {
1680 Report.Error (1038, Location, "#endregion directive expected");
1684 if ((state & ELSE_SEEN) != 0){
1685 Error_UnexpectedDirective ("#else within #else");
1692 if ((state & TAKEN_BEFORE) == 0){
1693 ret = ((state & PARENT_TAKING) != 0);
1702 ifstack.Push (state | ELSE_SEEN);
1709 // These are only processed if we are in a `taking' block
1711 if (!caller_is_taking)
1716 if (any_token_seen){
1717 Error_TokensSeen ();
1720 PreProcessDefinition (true, arg);
1724 if (any_token_seen){
1725 Error_TokensSeen ();
1728 PreProcessDefinition (false, arg);
1732 Report.Error (1029, Location, "#error: '" + arg + "'");
1736 Report.Warning (1030, Location, "#warning: '{0}'", arg);
1740 Report.Error (1024, Location, "Preprocessor directive expected (got: " + cmd + ")");
1745 private int consume_string (bool quoted)
1748 string_builder.Length = 0;
1750 while ((c = getChar ()) != -1){
1752 if (quoted && peekChar () == '"'){
1753 string_builder.Append ((char) c);
1757 val = string_builder.ToString ();
1758 return Token.LITERAL_STRING;
1764 Report.Error (1010, Location, "Newline in constant");
1776 string_builder.Append ((char) c);
1779 Report.Error (1039, Location, "Unterminated string literal");
1783 private int consume_identifier (int s)
1785 int res = consume_identifier (s, false);
1787 if (doc_state == XmlCommentState.Allowed)
1788 doc_state = XmlCommentState.NotAllowed;
1791 case Token.NAMESPACE:
1792 check_incorrect_doc_comment ();
1796 if (res == Token.PARTIAL) {
1797 // Save current position and parse next token.
1798 int old = reader.Position;
1799 int old_putback = putback_char;
1803 int next_token = token ();
1804 bool ok = (next_token == Token.CLASS) ||
1805 (next_token == Token.STRUCT) ||
1806 (next_token == Token.INTERFACE);
1808 reader.Position = old;
1809 putback_char = old_putback;
1815 return Token.IDENTIFIER;
1822 private int consume_identifier (int s, bool quoted)
1827 id_builder [0] = (char) s;
1829 while ((c = reader.Read ()) != -1) {
1830 if (is_identifier_part_character ((char) c)){
1831 if (pos == max_id_size){
1832 Report.Error (645, Location, "Identifier too long (limit is 512 chars)");
1836 id_builder [pos++] = (char) c;
1846 // Optimization: avoids doing the keyword lookup
1847 // on uppercase letters and _
1849 if (!quoted && (s >= 'a' || s == '_')){
1850 int keyword = GetKeyword (id_builder, pos);
1856 // Keep identifiers in an array of hashtables to avoid needless
1860 if (identifiers [pos] != null) {
1861 val = identifiers [pos][id_builder];
1863 return Token.IDENTIFIER;
1867 identifiers [pos] = new CharArrayHashtable (pos);
1869 val = new String (id_builder, 0, pos);
1871 char [] chars = new char [pos];
1872 Array.Copy (id_builder, chars, pos);
1874 identifiers [pos] [chars] = val;
1876 return Token.IDENTIFIER;
1879 public int xtoken ()
1882 bool doread = false;
1885 // Whether we have seen comments on the current line
1886 bool comments_seen = false;
1889 // optimization: eliminate col and implement #directive semantic correctly.
1890 for (;(c = getChar ()) != -1; col++) {
1895 col = (((col + 8) / 8) * 8) - 1;
1899 if (c == ' ' || c == '\f' || c == '\v' || c == 0xa0)
1903 if (peekChar () == '\n')
1909 any_token_seen |= tokens_seen;
1910 tokens_seen = false;
1911 comments_seen = false;
1915 // Handle double-slash comments.
1917 int d = peekChar ();
1921 if (RootContext.Documentation != null && peekChar () == '/') {
1923 // Don't allow ////.
1924 if ((d = peekChar ()) != '/') {
1925 if (doc_state == XmlCommentState.Allowed)
1926 handle_one_line_xml_comment ();
1927 else if (doc_state == XmlCommentState.NotAllowed)
1928 warn_incorrect_doc_comment ();
1931 while ((d = getChar ()) != -1 && (d != '\n') && d != '\r')
1938 any_token_seen |= tokens_seen;
1939 tokens_seen = false;
1940 comments_seen = false;
1942 } else if (d == '*'){
1944 bool docAppend = false;
1945 if (RootContext.Documentation != null && peekChar () == '*') {
1947 // But when it is /**/, just do nothing.
1948 if (peekChar () == '/') {
1952 if (doc_state == XmlCommentState.Allowed)
1954 else if (doc_state == XmlCommentState.NotAllowed)
1955 warn_incorrect_doc_comment ();
1958 int current_comment_start = 0;
1960 current_comment_start = xml_comment_buffer.Length;
1961 xml_comment_buffer.Append (Environment.NewLine);
1964 while ((d = getChar ()) != -1){
1965 if (d == '*' && peekChar () == '/'){
1968 comments_seen = true;
1972 xml_comment_buffer.Append ((char) d);
1978 any_token_seen |= tokens_seen;
1979 tokens_seen = false;
1981 // Reset 'comments_seen' just to be consistent.
1982 // It doesn't matter either way, here.
1984 comments_seen = false;
1988 update_formatted_doc_comment (current_comment_start);
1991 goto is_punct_label;
1995 if (is_identifier_start_character ((char)c)){
1997 return consume_identifier (c);
2001 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
2015 any_token_seen |= tokens_seen;
2016 tokens_seen = false;
2017 comments_seen = false;
2021 if (c >= '0' && c <= '9'){
2023 return is_number (c);
2028 int peek = peekChar ();
2029 if (peek >= '0' && peek <= '9')
2030 return is_number (c);
2034 /* For now, ignore pre-processor commands */
2035 // FIXME: In C# the '#' is not limited to appear
2036 // on the first column.
2040 if (tokens_seen || comments_seen) {
2041 error_details = "Preprocessor directives must appear as the first non-whitespace " +
2042 "character on a line.";
2044 Report.Error (1040, Location, error_details);
2051 cont = handle_preprocessing_directive (cont);
2059 bool skipping = false;
2060 for (;(c = getChar ()) != -1; col++){
2066 } else if (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0)
2070 if (c == '#' && !skipping)
2073 any_token_seen |= tokens_seen;
2074 tokens_seen = false;
2076 Report.Error (1027, Location, "#endif/#endregion expected");
2081 return consume_string (false);
2087 error_details = "Empty character literal";
2088 Report.Error (1011, Location, error_details);
2094 val = new System.Char ();
2099 error_details = "Too many characters in character literal";
2100 Report.Error (1012, Location, error_details);
2102 // Try to recover, read until newline or next "'"
2103 while ((c = getChar ()) != -1){
2104 if (c == '\n' || c == '\''){
2115 return Token.LITERAL_CHARACTER;
2122 return consume_string (true);
2123 } else if (is_identifier_start_character ((char) c)){
2124 return consume_identifier (c, true);
2126 Report.Error (1646, Location, "Keyword, identifier, or string expected after verbatim specifier: @");
2130 error_details = ((char)c).ToString ();
2139 // Handles one line xml comment
2141 private void handle_one_line_xml_comment ()
2144 while ((c = peekChar ()) == ' ')
2145 getChar (); // skip heading whitespaces.
2146 while ((c = peekChar ()) != -1 && c != '\n' && c != '\r') {
2148 xml_comment_buffer.Append ((char) getChar ());
2150 if (c == '\r' || c == '\n')
2151 xml_comment_buffer.Append (Environment.NewLine);
2155 // Remove heading "*" in Javadoc-like xml documentation.
2157 private void update_formatted_doc_comment (int current_comment_start)
2159 int length = xml_comment_buffer.Length - current_comment_start;
2160 string [] lines = xml_comment_buffer.ToString (
2161 current_comment_start,
2162 length).Replace ("\r", "").Split ('\n');
2164 // The first line starts with /**, thus it is not target
2165 // for the format check.
2166 for (int i = 1; i < lines.Length; i++) {
2167 string s = lines [i];
2168 int idx = s.IndexOf ('*');
2171 if (i < lines.Length - 1)
2175 head = s.Substring (0, idx);
2176 foreach (char c in head)
2179 lines [i] = s.Substring (idx + 1);
2181 xml_comment_buffer.Remove (current_comment_start, length);
2182 xml_comment_buffer.Insert (current_comment_start, String.Join (Environment.NewLine, lines));
2186 // Checks if there was incorrect doc comments and raise
2189 public void check_incorrect_doc_comment ()
2191 if (xml_comment_buffer.Length > 0)
2192 warn_incorrect_doc_comment ();
2196 // Raises a warning when tokenizer found incorrect doccomment
2199 private void warn_incorrect_doc_comment ()
2201 doc_state = XmlCommentState.Error;
2202 // in csc, it is 'XML comment is not placed on a valid
2203 // language element'. But that does not make sense.
2204 Report.Warning (1587, 2, Location, "XML comment is placed on an invalid language element which can not accept it.");
2208 // Consumes the saved xml comment lines (if any)
2209 // as for current target member or type.
2211 public string consume_doc_comment ()
2213 if (xml_comment_buffer.Length > 0) {
2214 string ret = xml_comment_buffer.ToString ();
2215 xml_comment_buffer.Length = 0;
2221 public void cleanup ()
2223 if (ifstack != null && ifstack.Count >= 1) {
2224 int state = (int) ifstack.Pop ();
2225 if ((state & REGION) != 0)
2226 Report.Error (1038, Location, "#endregion directive expected");
2228 Report.Error (1027, "#endif directive expected");
2235 // Indicates whether it accepts XML documentation or not.
2237 public enum XmlCommentState {
2238 // comment is allowed in this state.
2240 // comment is not allowed in this state.
2242 // once comments appeared when it is NotAllowed, then the
2243 // state is changed to it, until the state is changed to