3 using System.Globalization;
5 namespace Monkeydoc.Ecma
7 public class EcmaUrlTokenizer : yyParser.yyInput
9 const char EndOfStream = (char)0;
17 public EcmaUrlTokenizer (string input)
22 static bool is_identifier_start_character (char c)
24 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter (c);
27 static bool is_identifier_part_character (char c)
29 if (c >= 'a' && c <= 'z')
32 if (c >= 'A' && c <= 'Z')
35 if (c == '_' || (c >= '0' && c <= '9'))
41 return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
44 public bool advance ()
46 return Peek () != EndOfStream;
55 public Object value ()
62 int token = xtoken ();
63 //Console.WriteLine ("Current token {0} with value {1}", token, val == null ? "(none)" : val.ToString ());
64 if (token == Token.ERROR)
65 Console.WriteLine ("Problem at pos {0} after token {1}", current_pos, current_token);
66 current_token = token;
73 while (char.IsWhiteSpace (next))
84 return Token.OP_GENERICS_LT;
86 return Token.OP_GENERICS_GT;
88 return Token.OP_GENERICS_BACKTICK;
90 return Token.OP_OPEN_PAREN;
92 return Token.OP_CLOSE_PAREN;
94 return Token.INNER_TYPE_SEPARATOR;
98 return Token.SLASH_SEPARATOR;
100 return Token.OP_ARRAY_OPEN;
102 return Token.OP_ARRAY_CLOSE;
106 return Token.REF_ARG;
108 return Token.OUT_ARG;
110 return Token.EXPLICIT_IMPL_SEP;
112 return TokenizeIdentifierOrNumber (next);
116 int TokenizeIdentifierOrNumber (char current)
118 // We must first return the expression type which is a uppercase letter and a colon
119 if (current_pos < 2) {
124 if (is_identifier_start_character (current) || current == '*') {
126 // identifier length is artificially limited to 1024 bytes by implementations
127 char* pIdent = stackalloc char[512];
132 while ((peek = Peek ()) != EndOfStream && is_identifier_part_character (peek)) {
133 *(pIdent + identCount) = Read ();
138 val = new string ((char*)pIdent, 0, identCount);
139 return Token.IDENTIFIER;
141 } else if (char.IsDigit (current)) {
153 return input[real_current_pos++];
162 return input[real_current_pos];