public class Tokenizer : yyParser.yyInput
{
SeekableStreamReader reader;
- public SourceFile ref_name;
- public SourceFile file_name;
- public int ref_line = 1;
- public int line = 1;
- public int col = 1;
- public int current_token;
+ SourceFile ref_name;
+ SourceFile file_name;
+ int ref_line = 1;
+ int line = 1;
+ int col = 0;
+ int previous_col;
+ int current_token;
bool handle_get_set = false;
bool handle_remove_add = false;
bool handle_assembly = false;
bool handle_constraints = false;
bool handle_typeof = false;
+ Location current_location;
+ Location current_comment_location = Location.Null;
+ ArrayList escapedIdentifiers = new ArrayList ();
//
// XML documentation buffer. The save point is used to divide
set {
if (value == XmlCommentState.Allowed) {
check_incorrect_doc_comment ();
- consume_doc_comment ();
+ reset_doc_comment ();
}
xmlDocState = value;
}
}
+ public bool IsEscapedIdentifier (Location loc)
+ {
+ foreach (LocatedToken lt in escapedIdentifiers)
+ if (lt.Location.Equals (loc))
+ return true;
+ return false;
+ }
+
//
// Class variables
//
static CharArrayHashtable[] keywords;
+ static Hashtable keywordStrings = new Hashtable ();
static NumberStyles styles;
static NumberFormatInfo csharp_format_info;
static CharArrayHashtable [] identifiers = new CharArrayHashtable [max_id_size + 1];
- const int max_number_size = 128;
+ const int max_number_size = 512;
static char [] number_builder = new char [max_number_size];
static int number_pos;
}
}
+ //
+ // This is used when the tokenizer needs to save
+ // the current position as it needs to do some parsing
+ // on its own to deamiguate a token in behalf of the
+ // parser.
+ //
+ Stack position_stack = new Stack ();
+ class Position {
+ public int position;
+ public int ref_line;
+ public int col;
+ public int putback_char;
+ public int previous_col;
+ public int parsing_generic_less_than;
+
+ public Position (Tokenizer t)
+ {
+ position = t.reader.Position;
+ ref_line = t.ref_line;
+ col = t.col;
+ putback_char = t.putback_char;
+ previous_col = t.previous_col;
+ parsing_generic_less_than = t.parsing_generic_less_than;
+ }
+ }
+
+ public void PushPosition ()
+ {
+ position_stack.Push (new Position (this));
+ }
+
+ public void PopPosition ()
+ {
+ Position p = (Position) position_stack.Pop ();
+
+ reader.Position = p.position;
+ ref_line = p.ref_line;
+ col = p.col;
+ putback_char = p.putback_char;
+ previous_col = p.previous_col;
+
+ }
+
+ // Do not reset the position, ignore it.
+ public void DiscardPosition ()
+ {
+ position_stack.Pop ();
+ }
+
static void AddKeyword (string kw, int token) {
+ keywordStrings.Add (kw, kw);
if (keywords [kw.Length] == null) {
keywords [kw.Length] = new CharArrayHashtable (kw.Length);
}
}
public Location Location {
- get {
- return new Location (ref_line);
- }
+ get { return current_location; }
}
void define (string def)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c);
}
-
+
+ public static bool IsKeyword (string s)
+ {
+ return keywordStrings [s] != null;
+ }
+
public static bool IsValidIdentifier (string s)
{
if (s == null || s.Length == 0)
return false;
-
+
if (!is_identifier_start_character (s [0]))
return false;
{
start:
int the_token = token ();
+ if (the_token == Token.OPEN_BRACKET) {
+ do {
+ the_token = token ();
+ } while (the_token != Token.CLOSE_BRACKET);
+ the_token = token ();
+ }
switch (the_token) {
case Token.IDENTIFIER:
case Token.OBJECT:
return true;
else if ((the_token == Token.COMMA) || (the_token == Token.DOT))
goto start;
- else if (the_token == Token.INTERR)
+ else if (the_token == Token.INTERR || the_token == Token.STAR)
goto again;
else if (the_token == Token.OP_GENERICS_LT) {
if (!parse_less_than ())
return false;
}
- bool parsing_less_than = false;
int parsing_generic_less_than = 0;
int is_punct (char c, ref bool doread)
switch (c){
case '{':
+ val = Location;
return Token.OPEN_BRACE;
case '}':
+ val = Location;
return Token.CLOSE_BRACE;
case '[':
// To block doccomment inside attribute declaration.
--deambiguate_close_parens;
- // Save current position and parse next token.
- int old = reader.Position;
+ PushPosition ();
int new_token = token ();
- reader.Position = old;
- putback_char = -1;
+ PopPosition ();
if (new_token == Token.OPEN_PARENS)
return Token.CLOSE_PARENS_OPEN_PARENS;
case ',':
return Token.COMMA;
- case ':':
- return Token.COLON;
case ';':
+ val = Location;
return Token.SEMICOLON;
case '~':
+ val = Location;
return Token.TILDE;
case '?':
return Token.INTERR;
if (parsing_generic_less_than++ > 0)
return Token.OP_GENERICS_LT;
- int old = reader.Position;
if (handle_typeof) {
int dimension;
+ PushPosition ();
if (parse_generic_dimension (out dimension)) {
val = dimension;
+ DiscardPosition ();
return Token.GENERIC_DIMENSION;
}
- reader.Position = old;
- putback_char = -1;
+ PopPosition ();
}
// Save current position and parse next token.
- old = reader.Position;
+ PushPosition ();
bool is_generic_lt = parse_less_than ();
- reader.Position = old;
- putback_char = -1;
+ PopPosition ();
if (is_generic_lt) {
parsing_generic_less_than++;
d = peekChar ();
if (c == '+'){
- if (d == '+')
+ if (d == '+') {
+ val = Location;
t = Token.OP_INC;
+ }
else if (d == '=')
t = Token.OP_ADD_ASSIGN;
- else
+ else {
+ val = Location;
return Token.PLUS;
+ }
doread = true;
return t;
}
if (c == '-'){
- if (d == '-')
+ if (d == '-') {
+ val = Location;
t = Token.OP_DEC;
+ }
else if (d == '=')
t = Token.OP_SUB_ASSIGN;
else if (d == '>')
t = Token.OP_PTR;
- else
+ else {
+ val = Location;
return Token.MINUS;
+ }
doread = true;
return t;
}
doread = true;
return Token.OP_NE;
}
+ val = Location;
return Token.BANG;
}
doread = true;
return Token.OP_AND_ASSIGN;
}
+ val = Location;
return Token.BITWISE_AND;
}
doread = true;
return Token.OP_MULT_ASSIGN;
}
+ val = Location;
return Token.STAR;
}
return Token.CARRET;
}
+ if (c == ':'){
+ if (d == ':'){
+ doread = true;
+ return Token.DOUBLE_COLON;
+ }
+ val = Location;
+ return Token.COLON;
+ }
+
return Token.ERROR;
}
return seen_digits;
}
- bool is_hex (int e)
+ static bool is_hex (int e)
{
return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
}
-
- void hex_digits (int c)
- {
- if (c != -1)
- number_builder [number_pos++] = (char) c;
-
- }
-
- int real_type_suffix (int c)
+
+ static int real_type_suffix (int c)
{
int t;
// if we have not seen anything in between
// report this error
//
- Report.Warning (78, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
+ Report.Warning (78, 4, Location, "The 'l' suffix is easily confused with the digit '1' (use 'L' for clarity)");
}
//
// This goto statement causes the MS CLR 2.0 beta 1 csc to report an error, so
int adjust_real (int t)
{
string s = new String (number_builder, 0, number_pos);
+ const string error_details = "Floating-point constant is outside the range of type `{0}'";
switch (t){
case Token.LITERAL_DECIMAL:
val = System.Decimal.Parse (s, styles, csharp_format_info);
} catch (OverflowException) {
val = 0m;
- error_details = "Floating-point constant is outside the range of the type 'decimal'";
- Report.Error (594, Location, error_details);
+ Report.Error (594, Location, error_details, "decimal");
}
break;
case Token.LITERAL_FLOAT:
try {
- val = (float) System.Double.Parse (s, styles, csharp_format_info);
+ val = float.Parse (s, styles, csharp_format_info);
} catch (OverflowException) {
val = 0.0f;
- error_details = "Floating-point constant is outside the range of the type 'float'";
- Report.Error (594, Location, error_details);
+ Report.Error (594, Location, error_details, "float");
}
break;
val = System.Double.Parse (s, styles, csharp_format_info);
} catch (OverflowException) {
val = 0.0;
- error_details = "Floating-point constant is outside the range of the type 'double'";
- Report.Error (594, Location, error_details);
+ Report.Error (594, Location, error_details, "double");
}
break;
}
val = 0ul;
return Token.LITERAL_INTEGER;
}
+ catch (FormatException) {
+ Report.Error (1013, Location, "Invalid number");
+ val = 0ul;
+ return Token.LITERAL_INTEGER;
+ }
return integer_type_suffix (ul, peekChar ());
}
goto default;
return v;
default:
- Report.Error (1009, Location, "Unrecognized escape sequence in " + (char)d);
+ Report.Error (1009, Location, "Unrecognized escape sequence `\\{0}'", ((char)d).ToString ());
return d;
}
getChar ();
int getChar ()
{
- if (putback_char != -1){
- int x = putback_char;
+ int x;
+ if (putback_char != -1) {
+ x = putback_char;
putback_char = -1;
-
- return x;
+ } else
+ x = reader.Read ();
+ if (x == '\n') {
+ line++;
+ ref_line++;
+ previous_col = col;
+ col = 0;
}
- return reader.Read ();
+ else
+ col++;
+ return x;
}
int peekChar ()
Console.WriteLine ("Current [{0}] putting back [{1}] ", putback_char, c);
throw new Exception ("This should not happen putback on putback");
}
+ if (c == '\n' || col == 0) {
+ // It won't happen though.
+ line--;
+ ref_line--;
+ col = previous_col;
+ }
+ else
+ col--;
putback_char = c;
}
return val;
}
- bool IsCastToken (int token)
+ static bool IsCastToken (int token)
{
switch (token) {
case Token.BANG:
cmd = static_cmd_arg.ToString ();
if (c == '\n'){
- line++;
- ref_line++;
return;
- } else if (c == '\r')
- col = 0;
+ }
// skip over white space
while ((c = getChar ()) != -1 && (c != '\n') && ((c == '\r') || (c == ' ') || (c == '\t')))
;
if (c == '\n'){
- line++;
- ref_line++;
return;
} else if (c == '\r'){
- col = 0;
+ return;
+ } else if (c == -1){
+ arg = "";
return;
}
static_cmd_arg.Append ((char) c);
}
- if (c == '\n'){
- line++;
- ref_line++;
- } else if (c == '\r')
- col = 0;
arg = static_cmd_arg.ToString ().Trim ();
}
//
bool PreProcessLine (string arg)
{
- if (arg == "")
+ if (arg.Length == 0)
return false;
if (arg == "default"){
//
void PreProcessDefinition (bool is_define, string arg)
{
- if (arg == "" || arg == "true" || arg == "false"){
+ if (arg.Length == 0 || arg == "true" || arg == "false"){
Report.Error (1001, Location, "Missing identifer to pre-processor directive");
return;
}
Hashtable w_table = Report.warning_ignore_table;
foreach (int code in codes) {
if (w_table != null && w_table.Contains (code))
- Report.Warning (1635, 1, Location, "Cannot restore warning 'CS{0:0000}' because it was disabled globally", code);
+ Report.Warning (1635, 1, Location, String.Format ("Cannot restore warning `CS{0:0000}' because it was disabled globally", code));
Report.RegisterWarningRegion (Location).WarningEnable (Location, code);
}
return;
values[index++] = int.Parse (string_code, System.Globalization.CultureInfo.InvariantCulture);
}
catch (FormatException) {
- Report.Warning (1692, Location, "Invalid number");
+ Report.Warning (1692, 1, Location, "Invalid number");
}
}
return values;
void Error_InvalidDirective ()
{
- Report.Error (1517, Location, "Invalid pre-processor directive");
+ Report.Error (1517, Location, "Invalid preprocessor directive");
}
void Error_UnexpectedDirective (string extra)
void Error_TokensSeen ()
{
- Report.Error (
- 1032, Location,
- "Cannot define or undefine pre-processor symbols after a token in the file");
+ Report.Error (1032, Location,
+ "Cannot define or undefine preprocessor symbols after first token in file");
}
//
string cmd, arg;
bool region_directive = false;
+ current_location = new Location (ref_line, Col);
+
get_cmd_arg (out cmd, out arg);
// Eat any trailing whitespaces and single-line comments
if (!PreProcessLine (arg))
Report.Error (
1576, Location,
- "Argument to #line directive is missing or invalid");
+ "The line number specified for #line directive is missing or invalid");
return caller_is_taking;
case "region":
goto case "endif";
case "if":
- if (arg == ""){
+ if (arg.Length == 0){
Error_InvalidDirective ();
return true;
}
bool taking = false;
if (ifstack == null)
- ifstack = new Stack ();
+ ifstack = new Stack (2);
if (ifstack.Count == 0){
taking = true;
int pop = (int) ifstack.Pop ();
if (region_directive && ((pop & REGION) == 0))
- Report.Error (1027, Location, "#endif directive expected");
+ Report.Error (1027, Location, "Expected `#endif' directive");
else if (!region_directive && ((pop & REGION) != 0))
Report.Error (1038, Location, "#endregion directive expected");
+ if (!region_directive && arg.Length != 0) {
+ Report.Error (1025, Location, "Single-line comment or end-of-line expected");
+ }
+
if (ifstack.Count == 0)
return true;
else {
case "else":
if (ifstack == null || ifstack.Count == 0){
- Report.Error (
- 1028, Location,
- "Unexpected processor directive (no #if for this #else)");
+ Error_UnexpectedDirective ("no #if for this #else");
return true;
} else {
int state = (int) ifstack.Peek ();
return true;
case "warning":
- Report.Warning (1030, Location, "#warning: '{0}'", arg);
+ Report.Warning (1030, 1, Location, "#warning: `{0}'", arg);
return true;
}
- Report.Error (1024, Location, "Preprocessor directive expected (got: " + cmd + ")");
+ Report.Error (1024, Location, "Wrong preprocessor directive");
return true;
}
if (c == '\n'){
if (!quoted)
Report.Error (1010, Location, "Newline in constant");
- line++;
- ref_line++;
- col = 0;
- } else
- col++;
+ }
if (!quoted){
c = escape (c);
if (res == Token.PARTIAL) {
// Save current position and parse next token.
- int old = reader.Position;
- int old_putback = putback_char;
-
- putback_char = -1;
+ PushPosition ();
int next_token = token ();
bool ok = (next_token == Token.CLASS) ||
(next_token == Token.STRUCT) ||
- (next_token == Token.INTERFACE);
+ (next_token == Token.INTERFACE) ||
+ (next_token == Token.ENUM); // "partial" is a keyword in 'partial enum', even though it's not valid
- reader.Position = old;
- putback_char = old_putback;
+ PopPosition ();
if (ok)
return res;
else {
- val = "partial";
+ val = new LocatedToken (Location, "partial");
return Token.IDENTIFIER;
}
}
private int consume_identifier (int s, bool quoted)
{
int pos = 1;
- int c;
+ int c = -1;
id_builder [0] = (char) s;
- while ((c = reader.Read ()) != -1) {
+ current_location = new Location (ref_line, Col);
+
+ while ((c = getChar ()) != -1) {
+ loop:
if (is_identifier_part_character ((char) c)){
if (pos == max_id_size){
Report.Error (645, Location, "Identifier too long (limit is 512 chars)");
}
id_builder [pos++] = (char) c;
- putback_char = -1;
- col++;
+// putback_char = -1;
+ } else if (c == '\\') {
+ c = escape (c);
+ goto loop;
} else {
- putback_char = c;
+// putback_char = c;
+ putback (c);
break;
}
}
//
if (!quoted && (s >= 'a' || s == '_')){
int keyword = GetKeyword (id_builder, pos);
- if (keyword != -1)
+ if (keyword != -1) {
+ val = Location;
return keyword;
+ }
}
//
if (identifiers [pos] != null) {
val = identifiers [pos][id_builder];
if (val != null) {
+ val = new LocatedToken (Location, (string) val);
+ if (quoted)
+ escapedIdentifiers.Add (val);
return Token.IDENTIFIER;
}
}
identifiers [pos] = new CharArrayHashtable (pos);
val = new String (id_builder, 0, pos);
+ if (RootContext.Version == LanguageVersion.ISO_1) {
+ for (int i = 1; i < id_builder.Length; i += 3) {
+ if (id_builder [i] == '_' && (id_builder [i - 1] == '_' || id_builder [i + 1] == '_')) {
+ Report.Error (1638, Location,
+ "`{0}': Any identifier with double underscores cannot be used when ISO language version mode is specified", val.ToString ());
+ break;
+ }
+ }
+ }
char [] chars = new char [pos];
Array.Copy (id_builder, chars, pos);
identifiers [pos] [chars] = val;
+ val = new LocatedToken (Location, (string) val);
+ if (quoted)
+ escapedIdentifiers.Add (val);
return Token.IDENTIFIER;
}
val = null;
// optimization: eliminate col and implement #directive semantic correctly.
- for (;(c = getChar ()) != -1; col++) {
+ for (;(c = getChar ()) != -1;) {
if (c == ' ')
continue;
if (c == '\t') {
- col = (((col + 8) / 8) * 8) - 1;
continue;
}
if (peekChar () == '\n')
getChar ();
- line++;
- ref_line++;
- col = 0;
any_token_seen |= tokens_seen;
tokens_seen = false;
comments_seen = false;
getChar ();
// Don't allow ////.
if ((d = peekChar ()) != '/') {
+ update_comment_location ();
if (doc_state == XmlCommentState.Allowed)
handle_one_line_xml_comment ();
else if (doc_state == XmlCommentState.NotAllowed)
}
}
while ((d = getChar ()) != -1 && (d != '\n') && d != '\r')
- col++;
if (d == '\n'){
- line++;
- ref_line++;
- col = 0;
}
any_token_seen |= tokens_seen;
tokens_seen = false;
bool docAppend = false;
if (RootContext.Documentation != null && peekChar () == '*') {
getChar ();
+ update_comment_location ();
// But when it is /**/, just do nothing.
if (peekChar () == '/') {
getChar ();
while ((d = getChar ()) != -1){
if (d == '*' && peekChar () == '/'){
getChar ();
- col++;
comments_seen = true;
break;
}
xml_comment_buffer.Append ((char) d);
if (d == '\n'){
- line++;
- ref_line++;
- col = 0;
any_token_seen |= tokens_seen;
tokens_seen = false;
//
is_punct_label:
// white space
if (c == '\n'){
- line++;
- ref_line++;
- col = 0;
any_token_seen |= tokens_seen;
tokens_seen = false;
comments_seen = false;
cont = handle_preprocessing_directive (cont);
if (cont){
- col = 0;
continue;
}
- col = 1;
bool skipping = false;
- for (;(c = getChar ()) != -1; col++){
+ for (;(c = getChar ()) != -1;){
if (c == '\n'){
- col = 0;
- line++;
- ref_line++;
skipping = false;
} else if (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == 0xa0)
continue;
any_token_seen |= tokens_seen;
tokens_seen = false;
if (c == -1)
- Report.Error (1027, Location, "#endif/#endregion expected");
+ Report.Error (1027, Location, "Expected `#endif' directive");
continue;
}
if (c == -1)
return Token.EOF;
- if (is_identifier_start_character ((char)c)){
+ if (c == '\\' || is_identifier_start_character ((char)c)){
tokens_seen = true;
- return consume_identifier (c);
+ return consume_identifier (c);
}
- is_punct_label:
+ current_location = new Location (ref_line, Col);
if ((t = is_punct ((char)c, ref doread)) != Token.ERROR){
tokens_seen = true;
if (doread){
Report.Error (1011, Location, error_details);
return Token.ERROR;
}
+ if (c == '\r' || c == '\n') {
+ Report.Error (1010, Location, "Newline in constant");
+ return Token.ERROR;
+ }
c = escape (c);
if (c == -1)
return Token.ERROR;
// Try to recover, read until newline or next "'"
while ((c = getChar ()) != -1){
- if (c == '\n' || c == '\''){
- line++;
- ref_line++;
- col = 0;
+ if (c == '\n'){
+ break;
+ }
+ else if (c == '\'')
break;
- } else
- col++;
}
return Token.ERROR;
}
while ((c = peekChar ()) == ' ')
getChar (); // skip heading whitespaces.
while ((c = peekChar ()) != -1 && c != '\n' && c != '\r') {
- col++;
xml_comment_buffer.Append ((char) getChar ());
}
if (c == '\r' || c == '\n')
xml_comment_buffer.Insert (current_comment_start, String.Join (Environment.NewLine, lines));
}
+ //
+ // Updates current comment location.
+ //
+ private void update_comment_location ()
+ {
+ if (current_comment_location.IsNull) {
+ // "-2" is for heading "//" or "/*"
+ current_comment_location =
+ new Location (ref_line, col - 2);
+ }
+ }
+
//
// Checks if there was incorrect doc comments and raise
// warnings.
//
private void warn_incorrect_doc_comment ()
{
- doc_state = XmlCommentState.Error;
- // in csc, it is 'XML comment is not placed on a valid
- // language element'. But that does not make sense.
- Report.Warning (1587, 2, Location, "XML comment is placed on an invalid language element which can not accept it.");
+ if (doc_state != XmlCommentState.Error) {
+ doc_state = XmlCommentState.Error;
+ // in csc, it is 'XML comment is not placed on
+ // a valid language element'. But that does not
+ // make sense.
+ Report.Warning (1587, 2, Location, "XML comment is not placed on a valid language element");
+ }
}
//
{
if (xml_comment_buffer.Length > 0) {
string ret = xml_comment_buffer.ToString ();
- xml_comment_buffer.Length = 0;
+ reset_doc_comment ();
return ret;
}
return null;
}
+ void reset_doc_comment ()
+ {
+ xml_comment_buffer.Length = 0;
+ current_comment_location = Location.Null;
+ }
+
public void cleanup ()
{
if (ifstack != null && ifstack.Count >= 1) {
if ((state & REGION) != 0)
Report.Error (1038, Location, "#endregion directive expected");
else
- Report.Error (1027, Location, "#endif directive expected");
+ Report.Error (1027, Location, "Expected `#endif' directive");
}
}