1 // -*- coding: dos -*-
\r
3 // cs-tokenizer.cs: The Tokenizer for the C# compiler
\r
4 // This also implements the preprocessor
\r
6 // Author: Miguel de Icaza (miguel@gnu.org)
\r
8 // Licensed under the terms of the GNU GPL
\r
10 // (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)
\r
15 * Make sure we accept the proper Unicode ranges, per the spec.
\r
21 using System.Collections;
\r
23 using System.Globalization;
\r
24 using System.Reflection;
\r
26 namespace Mono.CSharp
\r
29 /// Tokenizer for C# source code.
\r
32 public class Tokenizer : yyParser.yyInput
\r
34 SeekableStreamReader reader;
\r
35 public SourceFile ref_name;
\r
36 public SourceFile file_name;
\r
37 public int ref_line = 1;
\r
38 public int line = 1;
\r
40 public int current_token;
\r
41 bool handle_get_set = false;
\r
42 bool handle_remove_add = false;
\r
43 bool handle_assembly = false;
\r
44 bool handle_constraints = false;
\r
47 // Whether tokens have been seen on this line
\r
49 bool tokens_seen = false;
\r
52 // Whether a token has been seen on the file
\r
53 // This is needed because `define' is not allowed to be used
\r
54 // after a token has been seen.
\r
56 bool any_token_seen = false;
\r
57 static Hashtable tokenValues;
\r
59 private static Hashtable TokenValueName
\r
62 if (tokenValues == null)
\r
63 tokenValues = GetTokenValueNameHash ();
\r
69 private static Hashtable GetTokenValueNameHash ()
\r
71 Type t = typeof (Token);
\r
72 FieldInfo [] fields = t.GetFields ();
\r
73 Hashtable hash = new Hashtable ();
\r
74 foreach (FieldInfo field in fields) {
\r
75 if (field.IsLiteral && field.IsStatic && field.FieldType == typeof (int))
\r
76 hash.Add (field.GetValue (null), field.Name);
\r
82 // Returns a verbose representation of the current location
\r
84 public string location {
\r
88 if (current_token == Token.ERROR)
\r
89 det = "detail: " + error_details;
\r
93 // return "Line: "+line+" Col: "+col + "\n" +
\r
94 // "VirtLine: "+ref_line +
\r
95 // " Token: "+current_token + " " + det;
\r
96 string current_token_name = TokenValueName [current_token] as string;
\r
97 if (current_token_name == null)
\r
98 current_token_name = current_token.ToString ();
\r
100 return String.Format ("{0} ({1},{2}), Token: {3} {4}", ref_name.Name,
\r
103 current_token_name,
\r
108 public bool PropertyParsing {
\r
110 return handle_get_set;
\r
114 handle_get_set = value;
\r
118 public bool AssemblyTargetParsing {
\r
120 return handle_assembly;
\r
124 handle_assembly = value;
\r
128 public bool EventParsing {
\r
130 return handle_remove_add;
\r
134 handle_remove_add = value;
\r
138 public bool ConstraintsParsing {
\r
140 return handle_constraints;
\r
144 handle_constraints = value;
\r
151 static CharArrayHashtable[] keywords;
\r
152 static NumberStyles styles;
\r
153 static NumberFormatInfo csharp_format_info;
\r
156 // Values for the associated token returned
\r
166 const int TAKING = 1;
\r
167 const int TAKEN_BEFORE = 2;
\r
168 const int ELSE_SEEN = 4;
\r
169 const int PARENT_TAKING = 8;
\r
170 const int REGION = 16;
\r
173 // pre-processor if stack state:
\r
177 static System.Text.StringBuilder string_builder;
\r
179 const int max_id_size = 512;
\r
180 static char [] id_builder = new char [max_id_size];
\r
182 static CharArrayHashtable [] identifiers = new CharArrayHashtable [max_id_size + 1];
\r
184 const int max_number_size = 128;
\r
185 static char [] number_builder = new char [max_number_size];
\r
186 static int number_pos;
\r
189 // Details about the error encoutered by the tokenizer
\r
191 string error_details;
\r
193 public string error {
\r
195 return error_details;
\r
211 static void AddKeyword (string kw, int token) {
\r
212 if (keywords [kw.Length] == null) {
\r
213 keywords [kw.Length] = new CharArrayHashtable (kw.Length);
\r
215 keywords [kw.Length] [kw.ToCharArray ()] = token;
\r
218 static void InitTokens ()
\r
220 keywords = new CharArrayHashtable [64];
\r
222 AddKeyword ("__arglist", Token.ARGLIST);
\r
223 AddKeyword ("abstract", Token.ABSTRACT);
\r
224 AddKeyword ("as", Token.AS);
\r
225 AddKeyword ("add", Token.ADD);
\r
226 AddKeyword ("assembly", Token.ASSEMBLY);
\r
227 AddKeyword ("base", Token.BASE);
\r
228 AddKeyword ("bool", Token.BOOL);
\r
229 AddKeyword ("break", Token.BREAK);
\r
230 AddKeyword ("byte", Token.BYTE);
\r
231 AddKeyword ("case", Token.CASE);
\r
232 AddKeyword ("catch", Token.CATCH);
\r
233 AddKeyword ("char", Token.CHAR);
\r
234 AddKeyword ("checked", Token.CHECKED);
\r
235 AddKeyword ("class", Token.CLASS);
\r
236 AddKeyword ("const", Token.CONST);
\r
237 AddKeyword ("continue", Token.CONTINUE);
\r
238 AddKeyword ("decimal", Token.DECIMAL);
\r
239 AddKeyword ("default", Token.DEFAULT);
\r
240 AddKeyword ("delegate", Token.DELEGATE);
\r
241 AddKeyword ("do", Token.DO);
\r
242 AddKeyword ("double", Token.DOUBLE);
\r
243 AddKeyword ("else", Token.ELSE);
\r
244 AddKeyword ("enum", Token.ENUM);
\r
245 AddKeyword ("event", Token.EVENT);
\r
246 AddKeyword ("explicit", Token.EXPLICIT);
\r
247 AddKeyword ("extern", Token.EXTERN);
\r
248 AddKeyword ("false", Token.FALSE);
\r
249 AddKeyword ("finally", Token.FINALLY);
\r
250 AddKeyword ("fixed", Token.FIXED);
\r
251 AddKeyword ("float", Token.FLOAT);
\r
252 AddKeyword ("for", Token.FOR);
\r
253 AddKeyword ("foreach", Token.FOREACH);
\r
254 AddKeyword ("goto", Token.GOTO);
\r
255 AddKeyword ("get", Token.GET);
\r
256 AddKeyword ("if", Token.IF);
\r
257 AddKeyword ("implicit", Token.IMPLICIT);
\r
258 AddKeyword ("in", Token.IN);
\r
259 AddKeyword ("int", Token.INT);
\r
260 AddKeyword ("interface", Token.INTERFACE);
\r
261 AddKeyword ("internal", Token.INTERNAL);
\r
262 AddKeyword ("is", Token.IS);
\r
263 AddKeyword ("lock", Token.LOCK);
\r
264 AddKeyword ("long", Token.LONG);
\r
265 AddKeyword ("namespace", Token.NAMESPACE);
\r
266 AddKeyword ("new", Token.NEW);
\r
267 AddKeyword ("null", Token.NULL);
\r
268 AddKeyword ("object", Token.OBJECT);
\r
269 AddKeyword ("operator", Token.OPERATOR);
\r
270 AddKeyword ("out", Token.OUT);
\r
271 AddKeyword ("override", Token.OVERRIDE);
\r
272 AddKeyword ("params", Token.PARAMS);
\r
273 AddKeyword ("private", Token.PRIVATE);
\r
274 AddKeyword ("protected", Token.PROTECTED);
\r
275 AddKeyword ("public", Token.PUBLIC);
\r
276 AddKeyword ("readonly", Token.READONLY);
\r
277 AddKeyword ("ref", Token.REF);
\r
278 AddKeyword ("remove", Token.REMOVE);
\r
279 AddKeyword ("return", Token.RETURN);
\r
280 AddKeyword ("sbyte", Token.SBYTE);
\r
281 AddKeyword ("sealed", Token.SEALED);
\r
282 AddKeyword ("set", Token.SET);
\r
283 AddKeyword ("short", Token.SHORT);
\r
284 AddKeyword ("sizeof", Token.SIZEOF);
\r
285 AddKeyword ("stackalloc", Token.STACKALLOC);
\r
286 AddKeyword ("static", Token.STATIC);
\r
287 AddKeyword ("string", Token.STRING);
\r
288 AddKeyword ("struct", Token.STRUCT);
\r
289 AddKeyword ("switch", Token.SWITCH);
\r
290 AddKeyword ("this", Token.THIS);
\r
291 AddKeyword ("throw", Token.THROW);
\r
292 AddKeyword ("true", Token.TRUE);
\r
293 AddKeyword ("try", Token.TRY);
\r
294 AddKeyword ("typeof", Token.TYPEOF);
\r
295 AddKeyword ("uint", Token.UINT);
\r
296 AddKeyword ("ulong", Token.ULONG);
\r
297 AddKeyword ("unchecked", Token.UNCHECKED);
\r
298 AddKeyword ("unsafe", Token.UNSAFE);
\r
299 AddKeyword ("ushort", Token.USHORT);
\r
300 AddKeyword ("using", Token.USING);
\r
301 AddKeyword ("virtual", Token.VIRTUAL);
\r
302 AddKeyword ("void", Token.VOID);
\r
303 AddKeyword ("volatile", Token.VOLATILE);
\r
304 AddKeyword ("where", Token.WHERE);
\r
305 AddKeyword ("while", Token.WHILE);
\r
306 AddKeyword ("partial", Token.PARTIAL);
\r
310 // Class initializer
\r
312 static Tokenizer ()
\r
315 csharp_format_info = NumberFormatInfo.InvariantInfo;
\r
316 styles = NumberStyles.Float;
\r
318 string_builder = new System.Text.StringBuilder ();
\r
321 int GetKeyword (char[] id, int id_len)
\r
324 * Keywords are stored in an array of hashtables grouped by their
\r
328 if ((id_len >= keywords.Length) || (keywords [id_len] == null))
\r
330 object o = keywords [id_len] [id];
\r
337 if (handle_get_set == false && (res == Token.GET || res == Token.SET))
\r
339 if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD))
\r
341 if (handle_assembly == false && res == Token.ASSEMBLY)
\r
343 if (handle_constraints == false && res == Token.WHERE)
\r
350 public Location Location {
\r
352 return new Location (ref_line);
\r
356 void define (string def)
\r
358 if (!RootContext.AllDefines.Contains (def)){
\r
359 RootContext.AllDefines [def] = true;
\r
361 if (defines.Contains (def))
\r
363 defines [def] = true;
\r
366 public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList defs)
\r
368 this.ref_name = file;
\r
369 this.file_name = file;
\r
375 defines = new Hashtable ();
\r
376 foreach (string def in defs)
\r
381 // FIXME: This could be `Location.Push' but we have to
\r
382 // find out why the MS compiler allows this
\r
384 Mono.CSharp.Location.Push (file);
\r
387 public static void Cleanup () {
\r
388 identifiers = null;
\r
391 static bool is_identifier_start_character (char c)
\r
393 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c);
\r
396 static bool is_identifier_part_character (char c)
\r
398 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);
\r
401 public static bool IsValidIdentifier (string s)
\r
403 if (s == null || s.Length == 0)
\r
406 if (!is_identifier_start_character (s [0]))
\r
409 for (int i = 1; i < s.Length; i ++)
\r
410 if (! is_identifier_part_character (s [i]))
\r
416 bool parse_less_than ()
\r
419 int the_token = token ();
\r
420 switch (the_token) {
\r
421 case Token.IDENTIFIER:
\r
425 case Token.DECIMAL:
\r
444 the_token = token ();
\r
446 if (the_token == Token.OP_GENERICS_GT)
\r
448 else if ((the_token == Token.COMMA) || (the_token == Token.DOT))
\r
450 else if (the_token == Token.OP_GENERICS_LT) {
\r
451 if (!parse_less_than ())
\r
454 } else if (the_token == Token.OPEN_BRACKET) {
\r
456 the_token = token ();
\r
457 if (the_token == Token.CLOSE_BRACKET)
\r
459 else if (the_token == Token.COMMA)
\r
460 goto rank_specifiers;
\r
467 bool parsing_less_than = false;
\r
468 int parsing_generic_less_than = 0;
\r
470 int is_punct (char c, ref bool doread)
\r
479 return Token.OPEN_BRACE;
\r
481 return Token.CLOSE_BRACE;
\r
483 return Token.OPEN_BRACKET;
\r
485 return Token.CLOSE_BRACKET;
\r
487 return Token.OPEN_PARENS;
\r
489 if (deambiguate_close_parens == 0)
\r
490 return Token.CLOSE_PARENS;
\r
492 --deambiguate_close_parens;
\r
494 // Save current position and parse next token.
\r
495 int old = reader.Position;
\r
496 int new_token = token ();
\r
497 reader.Position = old;
\r
500 if (new_token == Token.OPEN_PARENS)
\r
501 return Token.CLOSE_PARENS_OPEN_PARENS;
\r
502 else if (new_token == Token.MINUS)
\r
503 return Token.CLOSE_PARENS_MINUS;
\r
504 else if (IsCastToken (new_token))
\r
505 return Token.CLOSE_PARENS_CAST;
\r
507 return Token.CLOSE_PARENS_NO_CAST;
\r
511 return Token.COMMA;
\r
513 return Token.COLON;
\r
515 return Token.SEMICOLON;
\r
517 return Token.TILDE;
\r
519 return Token.INTERR;
\r
523 if (parsing_generic_less_than++ > 0)
\r
524 return Token.OP_GENERICS_LT;
\r
526 // Save current position and parse next token.
\r
527 int old = reader.Position;
\r
528 bool is_generic_lt = parse_less_than ();
\r
529 reader.Position = old;
\r
532 if (is_generic_lt) {
\r
533 parsing_generic_less_than++;
\r
534 return Token.OP_GENERICS_LT;
\r
536 parsing_generic_less_than = 0;
\r
545 return Token.OP_SHIFT_LEFT_ASSIGN;
\r
547 return Token.OP_SHIFT_LEFT;
\r
548 } else if (d == '='){
\r
550 return Token.OP_LE;
\r
552 return Token.OP_LT;
\r
553 } else if (c == '>') {
\r
554 if (parsing_generic_less_than > 0) {
\r
555 parsing_generic_less_than--;
\r
556 return Token.OP_GENERICS_GT;
\r
566 return Token.OP_SHIFT_RIGHT_ASSIGN;
\r
568 return Token.OP_SHIFT_RIGHT;
\r
569 } else if (d == '='){
\r
571 return Token.OP_GE;
\r
573 return Token.OP_GT;
\r
582 t = Token.OP_ADD_ASSIGN;
\r
592 t = Token.OP_SUB_ASSIGN;
\r
596 return Token.MINUS;
\r
604 return Token.OP_NE;
\r
612 return Token.OP_EQ;
\r
614 return Token.ASSIGN;
\r
620 return Token.OP_AND;
\r
621 } else if (d == '='){
\r
623 return Token.OP_AND_ASSIGN;
\r
625 return Token.BITWISE_AND;
\r
631 return Token.OP_OR;
\r
632 } else if (d == '='){
\r
634 return Token.OP_OR_ASSIGN;
\r
636 return Token.BITWISE_OR;
\r
642 return Token.OP_MULT_ASSIGN;
\r
650 return Token.OP_DIV_ASSIGN;
\r
658 return Token.OP_MOD_ASSIGN;
\r
660 return Token.PERCENT;
\r
666 return Token.OP_XOR_ASSIGN;
\r
668 return Token.CARRET;
\r
671 return Token.ERROR;
\r
674 int deambiguate_close_parens = 0;
\r
676 public void Deambiguate_CloseParens ()
\r
679 deambiguate_close_parens++;
\r
682 void Error_NumericConstantTooLong ()
\r
684 Report.Error (1021, Location, "Numeric constant too long");
\r
687 bool decimal_digits (int c)
\r
690 bool seen_digits = false;
\r
693 if (number_pos == max_number_size)
\r
694 Error_NumericConstantTooLong ();
\r
695 number_builder [number_pos++] = (char) c;
\r
699 // We use peekChar2, because decimal_digits needs to do a
\r
700 // 2-character look-ahead (5.ToString for example).
\r
702 while ((d = peekChar2 ()) != -1){
\r
703 if (d >= '0' && d <= '9'){
\r
704 if (number_pos == max_number_size)
\r
705 Error_NumericConstantTooLong ();
\r
706 number_builder [number_pos++] = (char) d;
\r
708 seen_digits = true;
\r
713 return seen_digits;
\r
716 bool is_hex (int e)
\r
718 return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
\r
721 void hex_digits (int c)
\r
724 number_builder [number_pos++] = (char) c;
\r
728 int real_type_suffix (int c)
\r
733 case 'F': case 'f':
\r
734 t = Token.LITERAL_FLOAT;
\r
736 case 'D': case 'd':
\r
737 t = Token.LITERAL_DOUBLE;
\r
739 case 'M': case 'm':
\r
740 t= Token.LITERAL_DECIMAL;
\r
748 int integer_type_suffix (ulong ul, int c)
\r
750 bool is_unsigned = false;
\r
751 bool is_long = false;
\r
754 bool scanning = true;
\r
757 case 'U': case 'u':
\r
760 is_unsigned = true;
\r
767 // if we have not seen anything in between
\r
768 // report this error
\r
772 "the 'l' suffix is easily confused with digit `1'," +
\r
773 " use 'L' for clarity");
\r
789 } while (scanning);
\r
792 if (is_long && is_unsigned){
\r
794 return Token.LITERAL_INTEGER;
\r
795 } else if (is_unsigned){
\r
796 // uint if possible, or ulong else.
\r
798 if ((ul & 0xffffffff00000000) == 0)
\r
802 } else if (is_long){
\r
803 // long if possible, ulong otherwise
\r
804 if ((ul & 0x8000000000000000) != 0)
\r
809 // int, uint, long or ulong in that order
\r
810 if ((ul & 0xffffffff00000000) == 0){
\r
811 uint ui = (uint) ul;
\r
813 if ((ui & 0x80000000) != 0)
\r
818 if ((ul & 0x8000000000000000) != 0)
\r
824 return Token.LITERAL_INTEGER;
\r
828 // given `c' as the next char in the input decide whether
\r
829 // we need to convert to a special type, and then choose
\r
830 // the best representation for the integer
\r
832 int adjust_int (int c)
\r
835 if (number_pos > 9){
\r
836 ulong ul = (uint) (number_builder [0] - '0');
\r
838 for (int i = 1; i < number_pos; i++){
\r
839 ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0')));
\r
841 return integer_type_suffix (ul, c);
\r
843 uint ui = (uint) (number_builder [0] - '0');
\r
845 for (int i = 1; i < number_pos; i++){
\r
846 ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0')));
\r
848 return integer_type_suffix (ui, c);
\r
850 } catch (OverflowException) {
\r
851 error_details = "Integral constant is too large";
\r
852 Report.Error (1021, Location, error_details);
\r
854 return Token.LITERAL_INTEGER;
\r
858 int adjust_real (int t)
\r
860 string s = new String (number_builder, 0, number_pos);
\r
863 case Token.LITERAL_DECIMAL:
\r
865 val = System.Decimal.Parse (s, styles, csharp_format_info);
\r
866 } catch (OverflowException) {
\r
868 error_details = "Floating-point constant is outside the range of the type 'decimal'";
\r
869 Report.Error (594, Location, error_details);
\r
872 case Token.LITERAL_FLOAT:
\r
874 val = (float) System.Double.Parse (s, styles, csharp_format_info);
\r
875 } catch (OverflowException) {
\r
877 error_details = "Floating-point constant is outside the range of the type 'float'";
\r
878 Report.Error (594, Location, error_details);
\r
882 case Token.LITERAL_DOUBLE:
\r
884 t = Token.LITERAL_DOUBLE;
\r
886 val = System.Double.Parse (s, styles, csharp_format_info);
\r
887 } catch (OverflowException) {
\r
889 error_details = "Floating-point constant is outside the range of the type 'double'";
\r
890 Report.Error (594, Location, error_details);
\r
903 while ((d = peekChar ()) != -1){
\r
905 number_builder [number_pos++] = (char) d;
\r
911 string s = new String (number_builder, 0, number_pos);
\r
913 if (number_pos <= 8)
\r
914 ul = System.UInt32.Parse (s, NumberStyles.HexNumber);
\r
916 ul = System.UInt64.Parse (s, NumberStyles.HexNumber);
\r
917 } catch (OverflowException){
\r
918 error_details = "Integral constant is too large";
\r
919 Report.Error (1021, Location, error_details);
\r
921 return Token.LITERAL_INTEGER;
\r
924 return integer_type_suffix (ul, peekChar ());
\r
928 // Invoked if we know we have .digits or digits
\r
930 int is_number (int c)
\r
932 bool is_real = false;
\r
937 if (c >= '0' && c <= '9'){
\r
939 int peek = peekChar ();
\r
941 if (peek == 'x' || peek == 'X')
\r
942 return handle_hex ();
\r
944 decimal_digits (c);
\r
949 // We need to handle the case of
\r
950 // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
\r
953 if (decimal_digits ('.')){
\r
959 return adjust_int (-1);
\r
963 if (c == 'e' || c == 'E'){
\r
965 if (number_pos == max_number_size)
\r
966 Error_NumericConstantTooLong ();
\r
967 number_builder [number_pos++] = 'e';
\r
971 if (number_pos == max_number_size)
\r
972 Error_NumericConstantTooLong ();
\r
973 number_builder [number_pos++] = '+';
\r
975 } else if (c == '-') {
\r
976 if (number_pos == max_number_size)
\r
977 Error_NumericConstantTooLong ();
\r
978 number_builder [number_pos++] = '-';
\r
981 if (number_pos == max_number_size)
\r
982 Error_NumericConstantTooLong ();
\r
983 number_builder [number_pos++] = '+';
\r
986 decimal_digits (c);
\r
990 type = real_type_suffix (c);
\r
991 if (type == Token.NONE && !is_real){
\r
993 return adjust_int (c);
\r
997 if (type == Token.NONE){
\r
1002 return adjust_real (type);
\r
1004 Console.WriteLine ("This should not be reached");
\r
1005 throw new Exception ("Is Number should never reach this point");
\r
1009 // Accepts exactly count (4 or 8) hex, no more no less
\r
1011 int getHex (int count, out bool error)
\r
1016 int top = count != -1 ? count : 4;
\r
1020 for (i = 0; i < top; i++){
\r
1023 if (c >= '0' && c <= '9')
\r
1024 c = (int) c - (int) '0';
\r
1025 else if (c >= 'A' && c <= 'F')
\r
1026 c = (int) c - (int) 'A' + 10;
\r
1027 else if (c >= 'a' && c <= 'f')
\r
1028 c = (int) c - (int) 'a' + 10;
\r
1034 total = (total * 16) + c;
\r
1036 int p = peekChar ();
\r
1039 if (!is_hex ((char)p))
\r
1046 int escape (int c)
\r
1080 v = getHex (-1, out error);
\r
1085 v = getHex (4, out error);
\r
1090 v = getHex (8, out error);
\r
1095 Report.Error (1009, Location, "Unrecognized escape sequence in " + (char)d);
\r
1104 if (putback_char != -1){
\r
1105 int x = putback_char;
\r
1106 putback_char = -1;
\r
1110 return reader.Read ();
\r
1115 if (putback_char != -1)
\r
1116 return putback_char;
\r
1117 putback_char = reader.Read ();
\r
1118 return putback_char;
\r
1123 if (putback_char != -1)
\r
1124 return putback_char;
\r
1125 return reader.Peek ();
\r
1128 void putback (int c)
\r
1130 if (putback_char != -1){
\r
1131 Console.WriteLine ("Col: " + col);
\r
1132 Console.WriteLine ("Row: " + line);
\r
1133 Console.WriteLine ("Name: " + ref_name.Name);
\r
1134 Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);
\r
1135 throw new Exception ("This should not happen putback on putback");
\r
1140 public bool advance ()
\r
1142 return peekChar () != -1;
\r
1145 public Object Value {
\r
1151 public Object value ()
\r
1156 bool IsCastToken (int token)
\r
1161 case Token.IDENTIFIER:
\r
1162 case Token.LITERAL_INTEGER:
\r
1163 case Token.LITERAL_FLOAT:
\r
1164 case Token.LITERAL_DOUBLE:
\r
1165 case Token.LITERAL_DECIMAL:
\r
1166 case Token.LITERAL_CHARACTER:
\r
1167 case Token.LITERAL_STRING:
\r
1169 case Token.CHECKED:
\r
1174 case Token.SIZEOF:
\r
1178 case Token.TYPEOF:
\r
1179 case Token.UNCHECKED:
\r
1180 case Token.UNSAFE:
\r
1183 // These can be part of a member access
\r
1188 case Token.USHORT:
\r
1191 case Token.DOUBLE:
\r
1201 public int token ()
\r
1203 current_token = xtoken ();
\r
1205 if (current_token != Token.DEFAULT)
\r
1206 return current_token;
\r
1208 int c = consume_whitespace ();
\r
1210 current_token = Token.ERROR;
\r
1211 else if (c == '(')
\r
1212 current_token = Token.DEFAULT_OPEN_PARENS;
\r
1216 return current_token;
\r
1219 static StringBuilder static_cmd_arg = new System.Text.StringBuilder ();
\r
1221 void get_cmd_arg (out string cmd, out string arg)
\r
1225 tokens_seen = false;
\r
1227 static_cmd_arg.Length = 0;
\r
1229 // skip over white space
\r
1230 while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t')))
\r
1233 while ((c != -1) && (c != '\n') && (c != ' ') && (c != '\t') && (c != '\r')){
\r
1234 if (is_identifier_part_character ((char) c)){
\r
1235 static_cmd_arg.Append ((char) c);
\r
1243 cmd = static_cmd_arg.ToString ();
\r
1249 } else if (c == '\r')
\r
1252 // skip over white space
\r
1253 while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t')))
\r
1260 } else if (c == '\r'){
\r
1265 static_cmd_arg.Length = 0;
\r
1266 static_cmd_arg.Append ((char) c);
\r
1268 while ((c = getChar ()) != -1 && (c != '\n') && (c != '\r')){
\r
1269 static_cmd_arg.Append ((char) c);
\r
1275 } else if (c == '\r')
\r
1277 arg = static_cmd_arg.ToString ().Trim ();
\r
1281 // Handles the #line directive
\r
1283 bool PreProcessLine (string arg)
\r
1288 if (arg == "default"){
\r
1290 ref_name = file_name;
\r
1291 Location.Push (ref_name);
\r
1293 } else if (arg == "hidden"){
\r
1295 // We ignore #line hidden
\r
1303 if ((pos = arg.IndexOf (' ')) != -1 && pos != 0){
\r
1304 ref_line = System.Int32.Parse (arg.Substring (0, pos));
\r
1307 char [] quotes = { '\"' };
\r
1309 string name = arg.Substring (pos). Trim (quotes);
\r
1310 ref_name = Location.LookupFile (name);
\r
1311 file_name.HasLineDirective = true;
\r
1312 ref_name.HasLineDirective = true;
\r
1313 Location.Push (ref_name);
\r
1315 ref_line = System.Int32.Parse (arg);
\r
1325 // Handles #define and #undef
\r
1327 void PreProcessDefinition (bool is_define, string arg)
\r
1329 if (arg == "" || arg == "true" || arg == "false"){
\r
1330 Report.Error (1001, Location, "Missing identifer to pre-processor directive");
\r
1334 char[] whitespace = { ' ', '\t' };
\r
1335 if (arg.IndexOfAny (whitespace) != -1){
\r
1336 Report.Error (1025, Location, "Single-line comment or end-of-line expected");
\r
1340 if (!is_identifier_start_character (arg [0]))
\r
1341 Report.Error (1001, Location, "Identifier expected: " + arg);
\r
1343 foreach (char c in arg.Substring (1)){
\r
1344 if (!is_identifier_part_character (c)){
\r
1345 Report.Error (1001, Location, "Identifier expected: " + arg);
\r
1351 if (defines == null)
\r
1352 defines = new Hashtable ();
\r
1355 if (defines == null)
\r
1357 if (defines.Contains (arg))
\r
1358 defines.Remove (arg);
\r
1362 bool eval_val (string s)
\r
1369 if (defines == null)
\r
1371 if (defines.Contains (s))
\r
1377 bool pp_primary (ref string s)
\r
1380 int len = s.Length;
\r
1386 s = s.Substring (1);
\r
1387 bool val = pp_expr (ref s);
\r
1388 if (s.Length > 0 && s [0] == ')'){
\r
1389 s = s.Substring (1);
\r
1392 Error_InvalidDirective ();
\r
1396 if (is_identifier_start_character (c)){
\r
1402 if (is_identifier_part_character (c)){
\r
1406 bool v = eval_val (s.Substring (0, j));
\r
1407 s = s.Substring (j);
\r
1410 bool vv = eval_val (s);
\r
1415 Error_InvalidDirective ();
\r
1419 bool pp_unary (ref string s)
\r
1422 int len = s.Length;
\r
1425 if (s [0] == '!'){
\r
1426 if (len > 1 && s [1] == '='){
\r
1427 Error_InvalidDirective ();
\r
1430 s = s.Substring (1);
\r
1431 return ! pp_primary (ref s);
\r
1433 return pp_primary (ref s);
\r
1435 Error_InvalidDirective ();
\r
1440 bool pp_eq (ref string s)
\r
1442 bool va = pp_unary (ref s);
\r
1445 int len = s.Length;
\r
1447 if (s [0] == '='){
\r
1448 if (len > 2 && s [1] == '='){
\r
1449 s = s.Substring (2);
\r
1450 return va == pp_unary (ref s);
\r
1452 Error_InvalidDirective ();
\r
1455 } else if (s [0] == '!' && len > 1 && s [1] == '='){
\r
1456 s = s.Substring (2);
\r
1458 return va != pp_unary (ref s);
\r
1467 bool pp_and (ref string s)
\r
1469 bool va = pp_eq (ref s);
\r
1472 int len = s.Length;
\r
1474 if (s [0] == '&'){
\r
1475 if (len > 2 && s [1] == '&'){
\r
1476 s = s.Substring (2);
\r
1477 return (va & pp_eq (ref s));
\r
1479 Error_InvalidDirective ();
\r
1488 // Evaluates an expression for `#if' or `#elif'
\r
1490 bool pp_expr (ref string s)
\r
1492 bool va = pp_and (ref s);
\r
1494 int len = s.Length;
\r
1499 if (len > 2 && s [1] == '|'){
\r
1500 s = s.Substring (2);
\r
1501 return va | pp_expr (ref s);
\r
1503 Error_InvalidDirective ();
\r
1512 bool eval (string s)
\r
1514 bool v = pp_expr (ref s);
\r
1516 if (s.Length != 0){
\r
1517 Error_InvalidDirective ();
\r
1524 void Error_InvalidDirective ()
\r
1526 Report.Error (1517, Location, "Invalid pre-processor directive");
\r
1529 void Error_UnexpectedDirective (string extra)
\r
1533 "Unexpected processor directive (" + extra + ")");
\r
1536 void Error_TokensSeen ()
\r
1540 "Cannot define or undefine pre-processor symbols after a token in the file");
\r
1544 // if true, then the code continues processing the code
\r
1545 // if false, the code stays in a loop until another directive is
\r
1548 bool handle_preprocessing_directive (bool caller_is_taking)
\r
1551 bool region_directive = false;
\r
1553 get_cmd_arg (out cmd, out arg);
\r
1555 // Eat any trailing whitespaces and single-line comments
\r
1556 if (arg.IndexOf ("//") != -1)
\r
1557 arg = arg.Substring (0, arg.IndexOf ("//"));
\r
1558 arg = arg.TrimEnd (' ', '\t');
\r
1561 // The first group of pre-processing instructions is always processed
\r
1565 if (RootContext.V2)
\r
1566 return caller_is_taking;
\r
1570 if (!PreProcessLine (arg))
\r
1573 "Argument to #line directive is missing or invalid");
\r
1574 return caller_is_taking;
\r
1577 region_directive = true;
\r
1582 region_directive = true;
\r
1583 goto case "endif";
\r
1587 Error_InvalidDirective ();
\r
1590 bool taking = false;
\r
1591 if (ifstack == null)
\r
1592 ifstack = new Stack ();
\r
1594 if (ifstack.Count == 0){
\r
1597 int state = (int) ifstack.Peek ();
\r
1598 if ((state & TAKING) != 0)
\r
1602 if (eval (arg) && taking){
\r
1603 int push = TAKING | TAKEN_BEFORE | PARENT_TAKING;
\r
1604 if (region_directive)
\r
1606 ifstack.Push (push);
\r
1609 int push = (taking ? PARENT_TAKING : 0);
\r
1610 if (region_directive)
\r
1612 ifstack.Push (push);
\r
1617 if (ifstack == null || ifstack.Count == 0){
\r
1618 Error_UnexpectedDirective ("no #if for this #endif");
\r
1621 int pop = (int) ifstack.Pop ();
\r
1623 if (region_directive && ((pop & REGION) == 0))
\r
1624 Report.Error (1027, Location, "#endif directive expected");
\r
1625 else if (!region_directive && ((pop & REGION) != 0))
\r
1626 Report.Error (1038, Location, "#endregion directive expected");
\r
1628 if (ifstack.Count == 0)
\r
1631 int state = (int) ifstack.Peek ();
\r
1633 if ((state & TAKING) != 0)
\r
1641 if (ifstack == null || ifstack.Count == 0){
\r
1642 Error_UnexpectedDirective ("no #if for this #elif");
\r
1645 int state = (int) ifstack.Peek ();
\r
1647 if ((state & REGION) != 0) {
\r
1648 Report.Error (1038, Location, "#endregion directive expected");
\r
1652 if ((state & ELSE_SEEN) != 0){
\r
1653 Error_UnexpectedDirective ("#elif not valid after #else");
\r
1657 if ((state & (TAKEN_BEFORE | TAKING)) != 0)
\r
1660 if (eval (arg) && ((state & PARENT_TAKING) != 0)){
\r
1661 state = (int) ifstack.Pop ();
\r
1662 ifstack.Push (state | TAKING | TAKEN_BEFORE);
\r
1669 if (ifstack == null || ifstack.Count == 0){
\r
1672 "Unexpected processor directive (no #if for this #else)");
\r
1675 int state = (int) ifstack.Peek ();
\r
1677 if ((state & REGION) != 0) {
\r
1678 Report.Error (1038, Location, "#endregion directive expected");
\r
1682 if ((state & ELSE_SEEN) != 0){
\r
1683 Error_UnexpectedDirective ("#else within #else");
\r
1690 if ((state & TAKEN_BEFORE) == 0){
\r
1691 ret = ((state & PARENT_TAKING) != 0);
\r
1700 ifstack.Push (state | ELSE_SEEN);
\r
1707 // These are only processed if we are in a `taking' block
\r
1709 if (!caller_is_taking)
\r
1714 if (any_token_seen){
\r
1715 Error_TokensSeen ();
\r
1718 PreProcessDefinition (true, arg);
\r
1722 if (any_token_seen){
\r
1723 Error_TokensSeen ();
\r
1726 PreProcessDefinition (false, arg);
\r
1730 Report.Error (1029, Location, "#error: '" + arg + "'");
\r
1734 Report.Warning (1030, Location, "#warning: '" + arg + "'");
\r
1738 Report.Error (1024, Location, "Preprocessor directive expected (got: " + cmd + ")");
\r
1743 private int consume_string (bool quoted)
\r
1746 string_builder.Length = 0;
\r
1748 while ((c = getChar ()) != -1){
\r
1750 if (quoted && peekChar () == '"'){
\r
1751 string_builder.Append ((char) c);
\r
1755 val = string_builder.ToString ();
\r
1756 return Token.LITERAL_STRING;
\r
1762 Report.Error (1010, Location, "Newline in constant");
\r
1772 return Token.ERROR;
\r
1774 string_builder.Append ((char) c);
\r
1777 Report.Error (1039, Location, "Unterminated string literal");
\r
1781 private int consume_identifier (int s)
\r
1783 int res = consume_identifier (s, false);
\r
1785 if (res == Token.PARTIAL) {
\r
1786 // Save current position and parse next token.
\r
1787 int old = reader.Position;
\r
1788 int old_putback = putback_char;
\r
1790 putback_char = -1;
\r
1792 int next_token = token ();
\r
1793 bool ok = (next_token == Token.CLASS) ||
\r
1794 (next_token == Token.STRUCT) ||
\r
1795 (next_token == Token.INTERFACE);
\r
1797 reader.Position = old;
\r
1798 putback_char = old_putback;
\r
1804 return Token.IDENTIFIER;
\r
1811 private int consume_identifier (int s, bool quoted)
\r
1816 id_builder [0] = (char) s;
\r
1818 while ((c = reader.Read ()) != -1) {
\r
1819 if (is_identifier_part_character ((char) c)){
\r
1820 if (pos == max_id_size){
\r
1821 Report.Error (645, Location, "Identifier too long (limit is 512 chars)");
\r
1822 return Token.ERROR;
\r
1825 id_builder [pos++] = (char) c;
\r
1826 putback_char = -1;
\r
1835 // Optimization: avoids doing the keyword lookup
\r
1836 // on uppercase letters and _
\r
1838 if (!quoted && (s >= 'a' || s == '_')){
\r
1839 int keyword = GetKeyword (id_builder, pos);
\r
1840 if (keyword != -1)
\r
1845 // Keep identifiers in an array of hashtables to avoid needless
\r
1849 if (identifiers [pos] != null) {
\r
1850 val = identifiers [pos][id_builder];
\r
1851 if (val != null) {
\r
1852 return Token.IDENTIFIER;
\r
1856 identifiers [pos] = new CharArrayHashtable (pos);
\r
1858 val = new String (id_builder, 0, pos);
\r
1860 char [] chars = new char [pos];
\r
1861 Array.Copy (id_builder, chars, pos);
\r
1863 identifiers [pos] [chars] = val;
\r
1865 return Token.IDENTIFIER;
\r
1868 int consume_whitespace ()
\r
1871 bool doread = false;
\r
1875 // optimization: eliminate col and implement #directive semantic correctly.
\r
1876 for (;(c = getChar ()) != -1; col++) {
\r
1881 col = (((col + 8) / 8) * 8) - 1;
\r
1885 if (c == ' ' || c == '\f' || c == '\v' || c == 0xa0)
\r
1889 if (peekChar () == '\n')
\r
1895 any_token_seen |= tokens_seen;
\r
1896 tokens_seen = false;
\r
1900 // Handle double-slash comments.
\r
1902 int d = peekChar ();
\r
1906 while ((d = getChar ()) != -1 && (d != '\n') && d != '\r')
\r
1913 any_token_seen |= tokens_seen;
\r
1914 tokens_seen = false;
\r
1916 } else if (d == '*'){
\r
1919 while ((d = getChar ()) != -1){
\r
1920 if (d == '*' && peekChar () == '/'){
\r
1929 any_token_seen |= tokens_seen;
\r
1930 tokens_seen = false;
\r
1935 goto is_punct_label;
\r
1944 any_token_seen |= tokens_seen;
\r
1945 tokens_seen = false;
\r
1949 /* For now, ignore pre-processor commands */
\r
1950 // FIXME: In C# the '#' is not limited to appear
\r
1951 // on the first column.
\r
1952 if (c == '#' && !tokens_seen){
\r
1957 cont = handle_preprocessing_directive (cont);
\r
1965 bool skipping = false;
\r
1966 for (;(c = getChar ()) != -1; col++){
\r
1972 } else if (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0)
\r
1974 else if (c != '#')
\r
1976 if (c == '#' && !skipping)
\r
1979 any_token_seen |= tokens_seen;
\r
1980 tokens_seen = false;
\r
1982 Report.Error (1027, Location, "#endif/#endregion expected");
\r
1992 public int xtoken ()
\r
1995 bool doread = false;
\r
1999 // optimization: eliminate col and implement #directive semantic correctly.
\r
2001 c = consume_whitespace ();
\r
2005 if (is_identifier_start_character ((char)c)){
\r
2006 tokens_seen = true;
\r
2007 return consume_identifier (c);
\r
2011 if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
\r
2012 tokens_seen = true;
\r
2020 if (c >= '0' && c <= '9'){
\r
2021 tokens_seen = true;
\r
2022 return is_number (c);
\r
2026 tokens_seen = true;
\r
2027 int peek = peekChar ();
\r
2028 if (peek >= '0' && peek <= '9')
\r
2029 return is_number (c);
\r
2034 return consume_string (false);
\r
2038 tokens_seen = true;
\r
2040 error_details = "Empty character literal";
\r
2041 Report.Error (1011, Location, error_details);
\r
2042 return Token.ERROR;
\r
2046 return Token.ERROR;
\r
2047 val = new System.Char ();
\r
2052 error_details = "Too many characters in character literal";
\r
2053 Report.Error (1012, Location, error_details);
\r
2055 // Try to recover, read until newline or next "'"
\r
2056 while ((c = getChar ()) != -1){
\r
2057 if (c == '\n' || c == '\''){
\r
2065 return Token.ERROR;
\r
2067 return Token.LITERAL_CHARACTER;
\r
2073 tokens_seen = true;
\r
2074 return consume_string (true);
\r
2075 } else if (is_identifier_start_character ((char) c)){
\r
2076 return consume_identifier (c, true);
\r
2078 Report.Error (1033, Location, "'@' must be followed by string constant or identifier");
\r
2083 error_details = "Preprocessor directives must appear as the first non-whitespace " +
\r
2084 "character on a line.";
\r
2086 Report.Error (1040, Location, error_details);
\r
2088 return Token.ERROR;
\r
2091 error_details = ((char)c).ToString ();
\r
2093 return Token.ERROR;
\r
2096 public void cleanup ()
\r
2098 if (ifstack != null && ifstack.Count >= 1) {
\r
2099 int state = (int) ifstack.Pop ();
\r
2100 if ((state & REGION) != 0)
\r
2101 Report.Error (1038, "#endregion directive expected");
\r
2103 Report.Error (1027, "#endif directive expected");
\r