fc3641a1dc89a8c7514efd98cb14626e12c430d4
[mono.git] / mcs / class / System / System / UriHelper.cs
1 using System;
2 using System.Globalization;
3 using System.Text;
4 using System.Collections.Generic;
5
6 namespace System {
7         internal class UriHelper {
8                 internal const UriFormat ToStringUnescape = (UriFormat) 0x7FFF;
9
10                 internal static bool IriParsing {
11                         get { return Uri.IriParsing; }
12                 }
13
14                 [Flags]
15                 internal enum FormatFlags {
16                         None = 0,
17                         HasComponentCharactersToNormalize = 1 << 0,
18                         HasUriCharactersToNormalize = 1 << 1,
19                         HasHost = 1 << 2,
20                 }
21
22                 [Flags]
23                 internal enum UriSchemes {
24                         Http = 1 << 0,
25                         Https = 1 << 1,
26                         File = 1 << 2,
27                         Ftp = 1 << 3,
28                         Gopher = 1 << 4,
29                         Ldap = 1 << 5,
30                         Mailto = 1 << 6,
31                         NetPipe = 1 << 7,
32                         NetTcp = 1 << 8,
33                         News = 1 << 9,
34                         Nntp = 1 << 10,
35                         Telnet = 1 << 11,
36                         Uuid = 1 << 12,
37                         Custom = 1 << 13,
38                         CustomWithHost = 1 << 14,
39                         All = ~0,
40                         None = 0
41                 }
42
43                 private static UriSchemes GetScheme (string schemeName)
44                 {
45                         if (schemeName == "")
46                                 return UriSchemes.None;
47                         if (schemeName == Uri.UriSchemeHttp)
48                                 return UriSchemes.Http;
49                         if (schemeName == Uri.UriSchemeHttps)
50                                 return UriSchemes.Https;
51                         if (schemeName == Uri.UriSchemeFile)
52                                 return UriSchemes.File;
53                         if (schemeName == Uri.UriSchemeFtp)
54                                 return UriSchemes.Ftp;
55                         if (schemeName == Uri.UriSchemeGopher)
56                                 return UriSchemes.Gopher;
57                         if (schemeName == Uri.UriSchemeLdap)
58                                 return UriSchemes.Ldap;
59                         if (schemeName == Uri.UriSchemeMailto)
60                                 return UriSchemes.Mailto;
61                         if (schemeName == Uri.UriSchemeNetPipe)
62                                 return UriSchemes.NetPipe;
63                         if (schemeName == Uri.UriSchemeNetTcp)
64                                 return UriSchemes.NetTcp;
65                         if (schemeName == Uri.UriSchemeNews)
66                                 return UriSchemes.News;
67                         if (schemeName == Uri.UriSchemeNntp)
68                                 return UriSchemes.Nntp;
69                         if (schemeName == Uri.UriSchemeTelnet)
70                                 return UriSchemes.Telnet;
71                         if (schemeName == Uri.UriSchemeUuid)
72                                 return UriSchemes.Uuid;
73
74                         return UriSchemes.Custom;
75                 }
76
77                 internal static bool SchemeContains (UriSchemes keys, UriSchemes flag)
78                 {
79                         return (keys & flag) != 0;
80                 }
81
82                 internal static bool IsKnownScheme(string scheme)
83                 {
84                         return GetScheme(scheme) != UriSchemes.Custom;
85                 }
86
87                 internal static string HexEscapeMultiByte (char character)
88                 {
89                         const string hex_upper_chars = "0123456789ABCDEF";
90                         string ret = "";
91                         byte [] bytes = Encoding.UTF8.GetBytes (new [] {character});
92                         foreach (byte b in bytes)
93                                 ret += "%" + hex_upper_chars [((b & 0xf0) >> 4)] + hex_upper_chars [((b & 0x0f))];
94
95                         return ret;
96                 }
97
98                 internal static bool SupportsQuery (string scheme)
99                 {
100                         return SupportsQuery (GetScheme (scheme));
101                 }
102
103                 internal static bool SupportsQuery(UriSchemes scheme)
104                 {
105                         if (SchemeContains (scheme, UriSchemes.File))
106                                 return IriParsing;
107
108                         return !SchemeContains (scheme, UriSchemes.Ftp | UriSchemes.Gopher | UriSchemes.Nntp | UriSchemes.Telnet | UriSchemes.News);
109                 }
110
111                 internal static bool HasCharactersToNormalize(string str)
112                 {
113                         int len = str.Length;
114                         for (int i = 0; i < len; i++) {
115                                 char c = str [i];
116                                 if (c != '%')
117                                         continue;
118
119                                 int iStart = i;
120                                 char surrogate;
121                                 char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate);
122
123                                 bool isEscaped = i - iStart > 1;
124                                 if (!isEscaped)
125                                         continue;
126
127                                 if ((x >= 'A' && x <= 'Z') || (x >= 'a' && x <= 'z') || (x >= '0' && x <= '9') || 
128                                          x == '-' || x == '.' || x == '_' || x == '~')
129                                         return true;
130
131                                 if (x > 0x7f)
132                                         return true;
133                         }
134
135                         return false;
136                 }
137
138                 internal static string FormatAbsolute (string str, string schemeName,
139                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags = FormatFlags.None)
140                 {
141                         return Format (str, schemeName, UriKind.Absolute, component, uriFormat, formatFlags);
142                 }
143
144                 internal static string FormatRelative (string str, string schemeName, UriFormat uriFormat)
145                 {
146                         var formatFlags = FormatFlags.None;
147                         if (HasCharactersToNormalize (str))
148                                 formatFlags |= FormatFlags.HasUriCharactersToNormalize;
149
150                         return Format (str, schemeName, UriKind.Relative, UriComponents.Path, uriFormat, formatFlags);
151                 }
152
153                 private static string Format (string str, string schemeName, UriKind uriKind,
154                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
155                 {
156                         if (string.IsNullOrEmpty (str))
157                                 return "";
158
159                         if (UriHelper.HasCharactersToNormalize (str))
160                                 formatFlags |= UriHelper.FormatFlags.HasComponentCharactersToNormalize;
161
162                         UriSchemes scheme = GetScheme (schemeName);
163
164                         if (scheme == UriSchemes.Custom && (formatFlags & FormatFlags.HasHost) != 0)
165                                 scheme = UriSchemes.CustomWithHost;
166
167                         var reduceAfter = UriSchemes.Http | UriSchemes.Https | UriSchemes.File | UriSchemes.NetPipe | UriSchemes.NetTcp;
168
169                         if (IriParsing) {
170                                 reduceAfter |= UriSchemes.Ftp;
171                         } else if (component == UriComponents.Path) {
172                                 if(scheme == UriSchemes.Ftp)
173                                         str = Reduce (str.Replace ('\\', '/'), !IriParsing);
174                                 if (scheme == UriSchemes.CustomWithHost)
175                                         str = Reduce (str.Replace ('\\', '/'), false);
176                         }
177
178                         str = FormatString (str, scheme, uriKind, component, uriFormat, formatFlags);
179
180                         if (component == UriComponents.Path) {
181                                 if (SchemeContains (scheme, reduceAfter))
182                                         str = Reduce (str, !IriParsing);
183                                 if(IriParsing && scheme == UriSchemes.CustomWithHost)
184                                         str = Reduce (str, false);
185                         }
186
187                         return str;
188                 }
189
190                 private static string FormatString (string str, UriSchemes scheme, UriKind uriKind,
191                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
192                 {
193                         var s = new StringBuilder ();
194                         int len = str.Length;
195                         for (int i = 0; i < len; i++) {
196                                 char c = str [i];
197                                 if (c == '%') {
198                                         int iStart = i;
199                                         char surrogate;
200                                         char x = Uri.HexUnescapeMultiByte (str, ref i, out surrogate);
201
202                                         bool isEscaped = i - iStart > 1;
203                                         s.Append (FormatChar (x, isEscaped, scheme, uriKind, component, uriFormat, formatFlags));
204                                         if (surrogate != char.MinValue)
205                                                 s.Append (surrogate);
206
207                                         i--;
208                                 } else
209                                         s.Append (FormatChar (c, false, scheme, uriKind, component, uriFormat, formatFlags));
210                         }
211                         
212                         return s.ToString();
213                 }
214
215                 private static string FormatChar (char c, bool isEscaped, UriSchemes scheme, UriKind uriKind,
216                         UriComponents component, UriFormat uriFormat, FormatFlags formatFlags)
217                 {
218                         if (!isEscaped && NeedToEscape (c, scheme, component, uriKind, uriFormat, formatFlags) ||
219                                 isEscaped && !NeedToUnescape (c, scheme, component, uriKind, uriFormat, formatFlags))
220                                 return HexEscapeMultiByte (c);
221
222                         if (c == '\\' && component == UriComponents.Path) {
223                                 if (!IriParsing && uriFormat != UriFormat.UriEscaped &&
224                                         SchemeContains (scheme, UriSchemes.Http | UriSchemes.Https))
225                                         return "/";
226
227                                 if (SchemeContains (scheme, UriSchemes.Http | UriSchemes.Https | UriSchemes.Ftp | UriSchemes.CustomWithHost))
228                                         return (isEscaped && uriFormat != UriFormat.UriEscaped) ? "\\" : "/";
229
230                                 if (SchemeContains (scheme, UriSchemes.NetPipe | UriSchemes.NetTcp | UriSchemes.File))
231                                         return "/";
232                         }
233
234                         return c.ToString (CultureInfo.InvariantCulture);
235                 }
236
237                 private static bool NeedToUnescape (char c, UriSchemes scheme, UriComponents component, UriKind uriKind,
238                         UriFormat uriFormat, FormatFlags formatFlags)
239                 {
240                         string cStr = c.ToString (CultureInfo.InvariantCulture);
241
242                         if (uriFormat == UriFormat.Unescaped)
243                                 return true;
244
245                         UriSchemes sDecoders = UriSchemes.NetPipe | UriSchemes.NetTcp;
246
247                         if (!IriParsing)
248                                 sDecoders |= UriSchemes.Http | UriSchemes.Https;
249
250                         if (c == '/' || c == '\\') {
251                                 if (!IriParsing && uriKind == UriKind.Absolute && uriFormat != UriFormat.UriEscaped &&
252                                         uriFormat != UriFormat.SafeUnescaped)
253                                         return true;
254
255                                 if (SchemeContains (scheme, UriSchemes.File)) {
256                                         return component != UriComponents.Fragment &&
257                                                    (component != UriComponents.Query || !IriParsing);
258                                 }
259
260                                 return component != UriComponents.Query && component != UriComponents.Fragment &&
261                                            SchemeContains (scheme, sDecoders);
262                         }
263
264                         if (c == '?') {
265                                 //Avoid creating new query
266                                 if (SupportsQuery (scheme) && component == UriComponents.Path)
267                                         return false;
268
269                                 if (!IriParsing && uriFormat == ToStringUnescape) {
270                                         if (SupportsQuery (scheme))
271                                                 return component == UriComponents.Query || component == UriComponents.Fragment;
272
273                                         return component == UriComponents.Fragment;
274                                 }
275
276                                 return false;
277                         }
278
279                         if (c == '#') {
280                                 //Avoid creating new fragment
281                                 if (component == UriComponents.Path || component == UriComponents.Query)
282                                         return false;
283
284                                 return false;
285                         }
286
287                         if (uriFormat == ToStringUnescape && !IriParsing) {
288                                 if (uriKind == UriKind.Relative)
289                                         return false;
290
291                                 if ("$&+,;=@".Contains (cStr))
292                                         return true;
293
294                                 if (c < 0x20 || c == 0x7f)
295                                         return true;
296                         }
297
298                         if (uriFormat == UriFormat.SafeUnescaped || uriFormat == ToStringUnescape) {
299                                 if ("-._~".Contains (cStr))
300                                         return true;
301
302                                 if (" !\"'()*<>^`{}|".Contains (cStr))
303                                         return uriKind != UriKind.Relative ||
304                                                 (IriParsing && (formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0);
305
306                                 if (":[]".Contains (cStr))
307                                         return uriKind != UriKind.Relative;
308
309                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
310                                         return true;
311
312                                 if (c > 0x7f)
313                                         return true;
314
315                                 return false;
316                         }
317
318                         if (uriFormat == UriFormat.UriEscaped) {
319                                 if (!IriParsing) {
320                                         if (".".Contains (cStr)) {
321                                                 if (SchemeContains (scheme, UriSchemes.File))
322                                                         return component != UriComponents.Fragment;
323
324                                                 return component != UriComponents.Query && component != UriComponents.Fragment &&
325                                                            SchemeContains (scheme, sDecoders);
326                                         }
327
328                                         return false;
329                                 }
330
331                                 if ("-._~".Contains (cStr))
332                                         return true;
333                                 
334                                 if ((formatFlags & FormatFlags.HasUriCharactersToNormalize) != 0 &&
335                                         "!'()*:[]".Contains (cStr))
336                                         return true;
337
338                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
339                                         return true;
340
341                                 return false;
342                         }
343
344                         return false;
345                 }
346
347                 private static bool NeedToEscape (char c, UriSchemes scheme, UriComponents component, UriKind uriKind,
348                         UriFormat uriFormat, FormatFlags formatFlags)
349                 {
350                         string cStr = c.ToString (CultureInfo.InvariantCulture);
351
352                         if (c == '?') {
353                                 if (uriFormat == UriFormat.Unescaped)
354                                         return false;
355
356                                 if (!SupportsQuery (scheme))
357                                         return component != UriComponents.Fragment;
358
359                                 //Avoid removing query
360                                 if (component == UriComponents.Path)
361                                         return false;
362
363                                 return false;
364                         }
365
366                         if (c == '#') {
367                                 //Avoid removing fragment
368                                 if (component == UriComponents.Path || component == UriComponents.Query)
369                                         return false;
370
371                                 if (IriParsing)
372                                         return false;
373
374                                 return uriFormat == UriFormat.UriEscaped ||
375                                         (uriFormat != UriFormat.Unescaped && (formatFlags & FormatFlags.HasComponentCharactersToNormalize) != 0);
376                         }
377
378                         if (uriFormat == UriFormat.SafeUnescaped || uriFormat == ToStringUnescape) {
379                                 if ("%".Contains (cStr))
380                                         return uriKind != UriKind.Relative;
381                         }
382
383                         if (uriFormat == UriFormat.SafeUnescaped) {
384                                 if (c < 0x20 || c == 0x7F)
385                                         return true;
386                         }
387
388                         if (uriFormat == UriFormat.UriEscaped) {
389                                 if (c < 0x20 || c >= 0x7F)
390                                         return true;
391
392                                 if (" \"%<>^`{}|".Contains (cStr))
393                                         return true;
394
395                                 if ("[]".Contains (cStr))
396                                         return !IriParsing;
397
398                                 if (c == '\\') {
399                                         return component != UriComponents.Path ||
400                                                    SchemeContains (scheme,
401                                                            UriSchemes.Gopher | UriSchemes.Ldap | UriSchemes.Mailto | UriSchemes.Nntp |
402                                                            UriSchemes.Telnet | UriSchemes.News | UriSchemes.Custom);
403                                 }
404                         }
405
406                         return false;
407                 }
408
409                 // This is called "compacting" in the MSDN documentation
410                 private static string Reduce (string path, bool trimDots)
411                 {
412                         // quick out, allocation-free, for a common case
413                         if (path == "/")
414                                 return path;
415
416                         List<string> result = new List<string> ();
417
418                         bool begin = true;
419                         for (int startpos = 0; startpos < path.Length; ) {
420                                 int endpos = path.IndexOf ('/', startpos);
421                                 if (endpos == -1)
422                                         endpos = path.Length;
423                                 string current = path.Substring (startpos, endpos-startpos);
424                                 startpos = endpos + 1;
425                                 if (begin && current.Length == 0) {
426                                         begin = false;
427                                         continue;
428                                 }
429
430                                 begin = false;
431                                 if (current == "..") {
432                                         int resultCount = result.Count;
433                                         // in 2.0 profile, skip leading ".." parts
434                                         if (resultCount == 0) {
435                                                 continue;
436                                         }
437
438                                         result.RemoveAt (resultCount - 1);
439                                         continue;
440                                 }
441
442                                 if (trimDots)
443                                         current = current.TrimEnd('.');
444
445                                 if (current == ".")
446                                         current = "";
447
448                                 if (current == "" && startpos < path.Length)
449                                         continue;
450
451                                 result.Add (current);
452                         }
453
454                         if (result.Count == 0)
455                                 return "/";
456
457                         StringBuilder res = new StringBuilder ();
458
459                         if (path [0] == '/')
460                                 res.Append ('/');
461
462                         bool first = true;
463                         foreach (string part in result) {
464                                 if (first) {
465                                         first = false;
466                                 } else {
467                                         res.Append ('/');
468                                 }
469                                 res.Append(part);
470                         }
471
472                         if (path [path.Length - 1] == '/')
473                                 res.Append ('/');
474                                 
475                         return res.ToString();
476                 }
477         }
478 }