[MWF] Improve ellipsis handling
[mono.git] / mcs / class / System / System / UriParseComponents.cs
1 //
2 // Internal UriParseComponents class
3 //
4 // Author:
5 //      Vinicius Jarina  <vinicius.jarina@xamarin.com>
6 //
7 // Copyright (C) 2012 Xamarin, Inc (http://www.xamarin.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 using System.IO;
30 using System.Net;
31 using System.Text;
32 using System.Globalization;
33
34 namespace System {
35         
36         internal class ParserState
37         {
38                 public ParserState (string uri, UriKind kind)
39                 {
40                         remaining = uri;
41                         this.kind = kind;
42                         elements  = new UriElements ();
43                 }
44                 
45                 public string remaining;
46                 public UriKind kind;
47                 public UriElements elements;
48                 public string error;
49         }
50         
51         // Parse Uri components (scheme, userinfo, host, query, fragment)
52         // http://www.ietf.org/rfc/rfc3986.txt
53         internal static class UriParseComponents
54         {
55                 public static UriElements ParseComponents (string uri, UriKind kind)
56                 {
57                         UriElements elements;
58                         string error;
59
60                         if (!TryParseComponents (uri, kind, out elements, out error))
61                                 throw new UriFormatException (error);
62
63                         return elements;
64                 }
65
66                 public static bool TryParseComponents (string uri, UriKind kind, out UriElements elements, out string error)
67                 {
68                         uri = uri.Trim ();
69
70                         ParserState state = new ParserState (uri, kind);
71                         elements = state.elements;
72                         error = null;
73
74                         if (uri.Length == 0 && (kind == UriKind.Relative || kind == UriKind.RelativeOrAbsolute)){
75                                 state.elements.isAbsoluteUri = false;
76                                 return true;
77                         }
78                         
79                         if (uri.Length <= 1 && kind == UriKind.Absolute) {
80                                 error = "Absolute URI is too short";
81                                 return false;
82                         }
83
84                         bool ok = ParseFilePath (state) &&
85                                 ParseScheme (state);
86
87                         var scheme = state.elements.scheme;
88                         UriParser parser = null;
89                         if (!string.IsNullOrEmpty (scheme)) {
90                                 parser = UriParser.GetParser (scheme);
91                                 if (parser != null && !(parser is DefaultUriParser))
92                                         return true;
93                         }
94
95                         ok = ok &&
96                                 ParseAuthority (state) &&
97                                 ParsePath (state) &&
98                                 ParseQuery (state) &&
99                                 ParseFragment (state);
100
101                         if (string.IsNullOrEmpty (state.elements.host) &&
102                                 (scheme == Uri.UriSchemeHttp || scheme == Uri.UriSchemeGopher || scheme == Uri.UriSchemeNntp ||
103                                 scheme == Uri.UriSchemeHttps || scheme == Uri.UriSchemeFtp))
104                                 state.error = "Invalid URI: The Authority/Host could not be parsed.";
105
106                         if (!string.IsNullOrEmpty (state.elements.host) &&
107                                 Uri.CheckHostName (state.elements.host) == UriHostNameType.Unknown)
108                                 state.error = "Invalid URI: The hostname could not be parsed.";
109
110                         if (!string.IsNullOrEmpty (state.error)) {
111                                 elements = null;
112                                 error = state.error;
113                                 return false;
114                         }
115                         
116                         return true;
117                 }
118
119                                 // ALPHA
120                 private static bool IsAlpha (char ch)
121                 {
122                         return (('a' <= ch) && (ch <= 'z')) ||
123                                    (('A' <= ch) && (ch <= 'Z'));
124                 }
125
126                 private static bool ParseFilePath (ParserState state)
127                 {
128                         return ParseWindowsFilePath (state) &&
129                                 ParseWindowsUNC (state) &&
130                                 ParseUnixFilePath (state);
131                 }
132
133                 private static bool ParseWindowsFilePath (ParserState state)
134                 {
135                         var scheme = state.elements.scheme;
136
137                         if (!string.IsNullOrEmpty (scheme) &&
138                                  scheme != Uri.UriSchemeFile && UriHelper.IsKnownScheme (scheme))
139                                 return state.remaining.Length > 0;
140
141                         string part = state.remaining;
142
143                         if (part.Length > 0 && (part [0] == '/' || part [0] == '\\'))
144                                 part = part.Substring (1);
145
146                         if (part.Length < 2 || part [1] != ':')
147                                 return state.remaining.Length > 0;
148
149                         if (!IsAlpha (part [0])) {
150                                 if (state.kind == UriKind.Absolute) {
151                                         state.error = "Invalid URI: The URI scheme is not valid.";
152                                         return false;
153                                 }
154                                 state.elements.isAbsoluteUri = false;
155                                 state.elements.path = part;
156                                 return false;
157                         }
158
159                         if (part.Length > 2 && part [2] != '\\' && part [2] != '/') {
160                                 state.error = "Relative file path is not allowed.";
161                                 return false;
162                         }
163
164                         if (string.IsNullOrEmpty (scheme)) {
165                                 state.elements.scheme = Uri.UriSchemeFile;
166                                 state.elements.delimiter = "://";
167                         }
168
169                         state.elements.path = part.Replace ("\\", "/");
170
171                         return false;
172                 }
173
174                 private static bool ParseWindowsUNC (ParserState state)
175                 {
176                         string part = state.remaining;
177
178                         if (part.Length < 2 || part [0] != '\\' || part [1] != '\\')
179                                 return state.remaining.Length > 0;
180
181                         state.elements.scheme = Uri.UriSchemeFile;
182                         state.elements.delimiter = "://";
183                         state.elements.isUnc = true;
184
185                         part = part.TrimStart ('\\');
186                         int pos = part.IndexOf ('\\');
187                         if (pos > 0) {
188                                 state.elements.path = part.Substring (pos);
189                                 state.elements.host = part.Substring (0, pos);
190                         } else { // "\\\\server"
191                                 state.elements.host = part;
192                                 state.elements.path = String.Empty;
193                         }
194                         state.elements.path = state.elements.path.Replace ("\\", "/");
195
196                         return false;
197                 }
198
199                 private static bool ParseUnixFilePath (ParserState state)
200                 {
201                         string part = state.remaining;
202
203                         if (part.Length < 1 || part [0] != '/' || Path.DirectorySeparatorChar != '/')
204                                 return state.remaining.Length > 0;
205
206                         state.elements.scheme = Uri.UriSchemeFile;
207                         state.elements.delimiter = "://";
208                         state.elements.isUnixFilePath = true;
209                         state.elements.isAbsoluteUri = (state.kind == UriKind.Relative)? false : true;
210
211                         if (part.Length >= 2 && part [0] == '/' && part [1] == '/') {
212                                 part = part.TrimStart (new char [] {'/'});
213                                 state.elements.path = '/' + part;
214                         } else
215                                 state.elements.path = part;
216
217                         return false;
218                 }
219                 
220                 // 3.1) scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
221                 private static bool ParseScheme (ParserState state)
222                 {
223                         string part = state.remaining;
224                         
225                         StringBuilder sb = new StringBuilder ();
226                         sb.Append (part [0]);
227                         
228                         int index;
229                         for (index = 1; index < part.Length; index++ ) {
230                                 char ch = part [index];
231                                 if (ch != '.' && ch != '-' && ch != '+' && !IsAlpha (ch) && !Char.IsDigit (ch))
232                                         break;
233                                 
234                                 sb.Append (ch);
235                         }
236                         
237                         if (index == 0 || index >= part.Length) {
238                                 if (state.kind == UriKind.Absolute) {
239                                         state.error = "Invalid URI: The format of the URI could not be determined.";
240                                         return false;
241                                 }
242
243                                 state.elements.isAbsoluteUri = false;
244                                 return state.remaining.Length > 0;
245                         }
246
247                         if (part [index] != ':') {
248                                 if (state.kind == UriKind.Absolute) {
249                                         state.error = "Invalid URI: The URI scheme is not valid.";
250                                         return false;
251                                 }
252
253                                 state.elements.isAbsoluteUri = false;
254                                 return state.remaining.Length > 0;
255                         }
256
257                         state.elements.scheme = sb.ToString ().ToLowerInvariant ();
258                         state.remaining = part.Substring (index);
259
260                         // Check scheme name characters as specified in RFC2396.
261                         // Note: different checks in 1.x and 2.0
262                         if (!Uri.CheckSchemeName (state.elements.scheme)) {
263                                 if (state.kind == UriKind.Absolute) {
264                                         state.error = "Invalid URI: The URI scheme is not valid.";
265                                         return false;
266                                 }
267
268                                 state.elements.isAbsoluteUri = false;
269                                 return state.remaining.Length > 0;
270                         }
271
272                         if (state.elements.scheme == Uri.UriSchemeFile) {
273                                 // under Windows all file:// URI are considered UNC, which is not the case other MacOS (e.g. Silverlight)
274 #if BOOTSTRAP_BASIC
275                                 state.elements.isUnc = (Path.DirectorySeparatorChar == '\\');
276 #else
277                                 state.elements.isUnc = Environment.IsRunningOnWindows;
278 #endif
279                         }
280
281                         return ParseDelimiter (state);
282                 }
283
284                 private static bool ParseDelimiter (ParserState state)
285                 {
286                         var delimiter = Uri.GetSchemeDelimiter (state.elements.scheme);
287
288                         if (!state.remaining.StartsWith (delimiter, StringComparison.Ordinal)) {
289                                 if (UriHelper.IsKnownScheme (state.elements.scheme)) {
290                                         state.error = "Invalid URI: The Authority/Host could not be parsed.";
291                                         return false;
292                                 }
293
294                                 delimiter = ":";
295                         }
296                                 
297                         state.elements.delimiter = delimiter;
298
299                         state.remaining = state.remaining.Substring (delimiter.Length);
300
301                         return state.remaining.Length > 0;
302                 }
303                 
304                 private static bool ParseAuthority (ParserState state)
305                 {
306                         if (state.elements.delimiter != Uri.SchemeDelimiter && state.elements.scheme != Uri.UriSchemeMailto)
307                                 return state.remaining.Length > 0;
308                         
309                         return ParseUser (state) &&
310                                 ParseHost (state) &&
311                                 ParsePort (state);
312                 }
313
314                 static bool IsUnreserved (char ch)
315                 {
316                         return ch == '-' || ch == '.' || ch == '_' || ch == '~';
317                 }
318
319
320                 static bool IsSubDelim (char ch)
321                 {
322                         return ch == '!' || ch == '$' || ch == '&' || ch == '\'' || ch == '(' || ch == ')' ||
323                                 ch == '*' || ch == '+' || ch == ',' || ch == ';' || ch == '=';
324                 }
325                 
326                 // userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
327                 private static bool ParseUser (ParserState state)
328                 {
329                         string part = state.remaining;
330                         StringBuilder sb = null;
331
332                         int index;
333                         for (index = 0; index < part.Length; index++) {
334                                 char ch = part [index];
335
336                                 if (ch == '%'){
337                                         if (!Uri.IsHexEncoding (part, index))
338                                                 return false;
339                                         var oldIndex = index;
340                                         ch = Uri.HexUnescape (part, ref index);
341                                         index--;
342                                         if (ch == '@') {
343                                                 sb.Append (part.Substring (oldIndex, index - oldIndex + 1));
344                                                 continue;
345                                         }
346                                 }
347
348                                 if (Char.IsLetterOrDigit (ch) || IsUnreserved (ch) || IsSubDelim (ch) || ch == ':'){
349                                         if (sb == null)
350                                                 sb = new StringBuilder ();
351                                         sb.Append (ch);
352                                 } else
353                                         break;
354                         }
355
356                         if (index + 1 <= part.Length && part [index] == '@') {
357                                 if (state.elements.scheme == Uri.UriSchemeFile) {
358                                         state.error = "Invalid URI: The hostname could not be parsed.";
359                                         return false;
360                                 }
361
362                                 state.elements.user = sb == null ? "" : sb.ToString ();
363                                 state.remaining = state.remaining.Substring (index + 1);
364                         }
365                                 
366                         return state.remaining.Length > 0;
367                 }
368                 
369                 // host        = IP-literal / IPv4address / reg-name
370                 private static bool ParseHost (ParserState state)
371                 {
372                         string part = state.remaining;
373
374                         if (state.elements.scheme == Uri.UriSchemeFile && part.Length >= 2 &&
375                                 (part [0] == '\\' || part [0] == '/') && part [1] == part [0]) {
376                                 part = part.TrimStart (part [0]);
377                                 state.remaining = part;
378                         }
379
380                         if (!ParseWindowsFilePath (state))
381                                 return false;
382
383                         StringBuilder sb = new StringBuilder ();
384                         
385                         var tmpHost = "";
386
387                         var possibleIpv6 = false;
388
389                         int index;
390                         for (index = 0; index < part.Length; index++) { 
391                                 
392                                 char ch = part [index];
393                                 
394                                 if (ch == '/' || ch == '#' || ch == '?')
395                                         break;
396
397                                 // Possible IPv6
398                                 if (string.IsNullOrEmpty (tmpHost) && ch == ':') {
399                                         tmpHost = sb.ToString ();
400                                         possibleIpv6 = true;
401                                 }
402                                 
403                                 sb.Append (ch);
404
405                                 if (possibleIpv6 && ch == ']')
406                                         break;
407                         }
408                         
409                         if (possibleIpv6) {
410                                 IPv6Address ipv6addr;
411                                 if (IPv6Address.TryParse (sb.ToString (), out ipv6addr)) {
412 #if NET_4_5
413                                         var ipStr = ipv6addr.ToString (false);
414 #else
415                                         var ipStr = ipv6addr.ToString (true);
416 #endif
417                                         //remove scope
418                                         ipStr = ipStr.Split ('%') [0];
419
420                                         state.elements.host = "[" + ipStr + "]";
421                                         state.elements.scopeId = ipv6addr.ScopeId;
422
423                                         state.remaining = part.Substring (sb.Length);
424                                         return state.remaining.Length > 0;
425                                 }
426                                 state.elements.host = tmpHost;
427                         } else
428                                 state.elements.host = sb.ToString ();
429
430                         state.elements.host = state.elements.host.ToLowerInvariant ();
431
432                         state.remaining = part.Substring (state.elements.host.Length);
433                                 
434                         return state.remaining.Length > 0;
435                 }
436                 
437                 // port          = *DIGIT
438                 private static bool ParsePort (ParserState state)
439                 {
440                         string part = state.remaining;
441                         if (part.Length == 0 || part [0] != ':')
442                                 return part.Length > 0;
443                         
444                         StringBuilder sb = new StringBuilder ();
445                         
446                         int index;
447                         for (index = 1; index < part.Length; index++ ) {
448                                 char ch = part [index];
449                                 
450                                 if (!char.IsDigit (ch)) {
451                                         if (ch == '/' || ch == '#' || ch == '?')
452                                                 break;
453
454                                         state.error = "Invalid URI: Invalid port specified.";
455                                         return false;
456                                 }
457                                 
458                                 sb.Append (ch);
459                         }
460
461                         if (index <= part.Length)
462                                 state.remaining = part.Substring (index);
463
464                         if (sb.Length == 0)
465                                 return state.remaining.Length > 0;
466                         
467                         int port;
468                         if (!Int32.TryParse (sb.ToString (), NumberStyles.None, CultureInfo.InvariantCulture, out port) ||
469                                 port < 0 || port > UInt16.MaxValue) {
470                                 state.error = "Invalid URI: Invalid port number";
471                                 return false;
472                         }
473
474                         state.elements.port = port;
475                                 
476                         return state.remaining.Length > 0;
477                 }
478                 
479                 private static bool ParsePath (ParserState state)
480                 {
481                         string part = state.remaining;
482                         StringBuilder sb = new StringBuilder ();
483                         
484                         int index;
485                         for (index = 0; index < part.Length; index++) {
486                                 
487                                 char ch = part [index];
488                                 
489                                 var supportsQuery = UriHelper.SupportsQuery (state.elements.scheme);
490
491                                 if (ch == '#' || (supportsQuery && ch == '?'))
492                                         break;
493                                 
494                                 sb.Append (ch);
495                         }
496                         
497                         if (index <= part.Length)
498                                 state.remaining = part.Substring (index);
499                         
500                         state.elements.path  = sb.ToString ();
501                                 
502                         return state.remaining.Length > 0;
503                 }
504                 
505                 private static bool ParseQuery (ParserState state)
506                 {
507                         string part = state.remaining;
508
509                         if (!UriHelper.SupportsQuery (state.elements.scheme))
510                                 return part.Length > 0;
511                         
512                         if (part.Length == 0 || part [0] != '?')
513                                 return part.Length > 0;
514                         
515                         StringBuilder sb = new StringBuilder ();
516                         
517                         int index;
518                         for (index = 1; index < part.Length; index++) {
519                                 
520                                 char ch = part [index];
521                                 
522                                 if (ch == '#')
523                                         break;
524                                 
525                                 sb.Append (ch);
526                         }
527                         
528                         if (index <= part.Length)
529                                 state.remaining = part.Substring (index);
530                         
531                         state.elements.query  = sb.ToString ();
532                                 
533                         return state.remaining.Length > 0;
534                 }
535                 
536                 private static bool ParseFragment (ParserState state)
537                 {
538                         string part = state.remaining;
539                         
540                         if (part.Length == 0 || part [0] != '#')
541                                 return part.Length > 0;
542                         
543                         StringBuilder sb = new StringBuilder ();
544                         
545                         int index;
546                         for (index = 1; index < part.Length; index++) { 
547                                 
548                                 char ch = part [index];
549                                 
550                                 sb.Append (ch);
551                         }
552                         
553                         state.elements.fragment = sb.ToString ();
554                         
555                         return false;
556                 }
557         }
558 }