Merge pull request #601 from knocte/sock_improvements
[mono.git] / mcs / class / monodoc / Monodoc.Ecma / EcmaUrlTokenizer.cs
1 using System;
2 using System.Text;
3 using System.Globalization;
4
5 namespace Monodoc.Ecma
6 {
7         public class EcmaUrlTokenizer : yyParser.yyInput
8         {
9                 const char EndOfStream = (char)0;
10                 string input;
11                 object val;
12                 int current_token;
13                 int current_pos;
14                 int real_current_pos;
15                 int identCount = 0;
16
17                 public EcmaUrlTokenizer (string input)
18                 {
19                         this.input = input;
20                 }
21
22                 static bool is_identifier_start_character (char c)
23                 {
24                         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter (c);
25                 }
26
27                 static bool is_identifier_part_character (char c)
28                 {
29                         if (c >= 'a' && c <= 'z')
30                                 return true;
31
32                         if (c >= 'A' && c <= 'Z')
33                                 return true;
34
35                         if (c == '_' || (c >= '0' && c <= '9'))
36                                 return true;
37
38                         if (c < 0x80)
39                                 return false;
40
41                         return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
42                 }
43
44                 public bool advance ()
45                 {
46                         return Peek () != EndOfStream;
47                 }
48
49                 public Object Value {
50                         get {
51                                 return val;
52                         }
53                 }
54
55                 public Object value ()
56                 {
57                         return val;
58                 }
59
60                 public int token ()
61                 {
62                         int token = xtoken ();
63                         //Console.WriteLine ("Current token {0} with value {1}", token, val == null ? "(none)" : val.ToString ());
64                         if (token == Token.ERROR)
65                                 Console.WriteLine ("Problem at pos {0} after token {1}", current_pos, current_token);
66                         current_token = token;
67                         return token;
68                 }
69
70                 int xtoken ()
71                 {
72                         char next = Read ();
73                         while (char.IsWhiteSpace (next))
74                                 next = Read ();
75                         current_pos++;
76                         val = null;
77
78                         switch (next) {
79                         case ',':
80                                 return Token.COMMA;
81                         case '.':
82                                 return Token.DOT;
83                         case '{':
84                         case '<':
85                                 return Token.OP_GENERICS_LT;
86                         case '}':
87                         case '>':
88                                 return Token.OP_GENERICS_GT;
89                         case '`':
90                                 return Token.OP_GENERICS_BACKTICK;
91                         case '(':
92                                 return Token.OP_OPEN_PAREN;
93                         case ')':
94                                 return Token.OP_CLOSE_PAREN;
95                         case '+':
96                                 return Token.INNER_TYPE_SEPARATOR;
97                         case ':':
98                                 return Token.COLON;
99                         case '/':
100                                 return Token.SLASH_SEPARATOR;
101                         case '[':
102                                 return Token.OP_ARRAY_OPEN;
103                         case ']':
104                                 return Token.OP_ARRAY_CLOSE;
105                         case '*':
106                                 return Token.STAR;
107                         case '&':
108                                 return Token.REF_ARG;
109                         case '@':
110                                 return Token.OUT_ARG;
111                         case '$':
112                                 return Token.EXPLICIT_IMPL_SEP;
113                         default:
114                                 return TokenizeIdentifierOrNumber (next);
115                         }
116                 }
117
118                 int TokenizeIdentifierOrNumber (char current)
119                 {
120                         // We must first return the expression type which is a uppercase letter and a colon
121                         if (current_pos < 2) {
122                                 val = null;
123                                 return (int)current;
124                         }
125
126                         if (is_identifier_start_character (current) || current == '*') {
127                                 unsafe {
128                                         // identifier length is artificially limited to 1024 bytes by implementations
129                                         char* pIdent = stackalloc char[512];
130                                         *pIdent = current;
131                                         identCount = 1;
132
133                                         char peek;
134                                         while ((peek = Peek ()) != EndOfStream && is_identifier_part_character (peek)) {
135                                                 *(pIdent + identCount) = Read ();
136                                                 ++current_pos;
137                                                 ++identCount;
138                                         }
139
140                                         val = new string ((char*)pIdent, 0, identCount);
141                                         return Token.IDENTIFIER;
142                                 }
143                         } else if (char.IsDigit (current)) {
144                                 val = current - '0';
145                                 return Token.DIGIT;
146                         } else {
147                                 val = null;
148                                 return Token.ERROR;
149                         }
150                 }
151
152                 char Read ()
153                 {
154                         try {
155                                 return input[real_current_pos++];
156                         } catch {
157                                 return EndOfStream;
158                         }
159                 }
160
161                 char Peek ()
162                 {
163                         try {
164                                 return input[real_current_pos];
165                         } catch {
166                                 return EndOfStream;
167                         }
168                 }
169         }
170 }