mcs/class/referencesource/System.Web/Util/UriUtil.cs

   1 //------------------------------------------------------------------------------
   2 // <copyright file="UriUtil.cs" company="Microsoft">
   3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
   4 // </copyright>
   5 //------------------------------------------------------------------------------
   6
   7 namespace System.Web.Util {
   8     using System;
   9     using System.Linq;
  10     using System.Text;
  11
  12     // Contains helpers for URI generation and parsing
  13
  14     internal static class UriUtil {
  15
  16         private static readonly char[] _queryFragmentSeparators = new char[] { '?', '#' };
  17
  18         // Similar to UriBuilder, but contains semantics specific to generation
  19         // of the Request.Url property.
  20         internal static Uri BuildUri(string scheme, string serverName, string port, string path, string queryString) {
  21             return BuildUriImpl(scheme, serverName, port, path, queryString, AppSettings.UseLegacyRequestUrlGeneration);
  22         }
  23
  24         // for unit testing
  25         internal static Uri BuildUriImpl(string scheme, string serverName, string port, string path, string queryString, bool useLegacyRequestUrlGeneration) {
  26             Debug.Assert(!String.IsNullOrEmpty(scheme));
  27             Debug.Assert(!String.IsNullOrEmpty(serverName));
  28             Debug.Assert(!String.IsNullOrEmpty(path));
  29
  30             if (!useLegacyRequestUrlGeneration) {
  31                 if (path != null) {
  32                     // The path that is provided to us is expected to be in an already-decoded
  33                     // state, but the Uri class expects encoded input, so we'll re-encode.
  34                     // This removes ambiguity that can lead to unintentional double-unescaping.
  35                     path = EscapeForPath(path);
  36                 }
  37
  38                 if (queryString != null) {
  39                     // Need to replace any stray '#' characters that appear in the
  40                     // query string so that we don't end up accidentally generating
  41                     // a fragment in the resulting URI.
  42                     string reencodedQueryString = queryString.Replace("#", "%23");
  43                     queryString = reencodedQueryString;
  44                 }
  45             }
  46
  47             if (port != null) {
  48                 port = ":" + port;
  49             }
  50
  51             string uriString = scheme + "://" + serverName + port + path + queryString;
  52             return new Uri(uriString);
  53         }
  54
  55         private static string EscapeForPath(string unescaped) {
  56             // DevDiv 762893: Applications might not call Uri.UnescapeDataString when looking
  57             // at components of the URI, and they'll be broken if certain path-safe characters
  58             // are now escaped.
  59             if (String.IsNullOrEmpty(unescaped) || ContainsOnlyPathSafeCharacters(unescaped))
  60                 return unescaped;
  61
  62             string escaped = Uri.EscapeDataString(unescaped);
  63
  64             // If nothing was escaped, no need to decode
  65             if (String.Equals(escaped, unescaped, StringComparison.Ordinal))
  66                 return unescaped;
  67
  68             // We're going to perform multiple replace operations.
  69             // StringBuilder.Replace is much more memory-efficient than String.Replace
  70             StringBuilder builder = new StringBuilder(escaped);
  71
  72             // Uri.EscapeDataString() is guaranteed to produce uppercase escape sequences.
  73             // Path-safe characters are listed in RFC 3986, Appendix A. We also add '/' to
  74             // this list since EscapeDataString may contain path segments.
  75             builder.Replace("%21", "!");
  76             builder.Replace("%24", "$");
  77             builder.Replace("%26", "&");
  78             builder.Replace("%27", "'");
  79             builder.Replace("%28", "(");
  80             builder.Replace("%29", ")");
  81             builder.Replace("%2A", "*");
  82             builder.Replace("%2B", "+");
  83             builder.Replace("%2C", ",");
  84             builder.Replace("%2F", "/");
  85             builder.Replace("%3A", ":");
  86             builder.Replace("%3B", ";");
  87             builder.Replace("%3D", "=");
  88             builder.Replace("%40", "@");
  89             return builder.ToString();
  90         }
  91
  92         private static bool ContainsOnlyPathSafeCharacters(string input) {
  93             // See RFC 3986, Appendix A for the list of path-safe characters.
  94             for (int i = 0; i < input.Length; i++) {
  95                 char c = input[i];
  96
  97                 // unreserved = ALPHA / DIGIT / ...
  98                 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) {
  99                     continue;
 100                 }
 101
 102                 switch (c) {
 103                     case '/': // path-abempty; path-absolute
 104                     case '-': case '.': case '_': case '~': // unreserved
 105                     case ':': case '@': // pchar
 106                     case '!': case '$': case '&': case '\'': case '(': case ')': // sub-delims
 107                     case '*': case '+': case ',': case ';': case '=': // sub-delims, cont.
 108                         continue;
 109
 110                     default:
 111                         return false; // not path-safe
 112                 }
 113             }
 114
 115             // no bad characters found
 116             return true;
 117         }
 118
 119         // Just extracts the query string and fragment from the input path by splitting on the separator characters.
 120         // Doesn't perform any validation as to whether the input represents a valid URL.
 121         // Concatenating the pieces back together will form the original input string.
 122         internal static void ExtractQueryAndFragment(string input, out string path, out string queryAndFragment) {
 123             int queryFragmentSeparatorPos = input.IndexOfAny(_queryFragmentSeparators);
 124             if (queryFragmentSeparatorPos != -1) {
 125                 path = input.Substring(0, queryFragmentSeparatorPos);
 126                 queryAndFragment = input.Substring(queryFragmentSeparatorPos);
 127             }
 128             else {
 129                 // no query or fragment separator
 130                 path = input;
 131                 queryAndFragment = null;
 132             }
 133         }
 134
 135         // Schemes that are generally considered safe for the purposes of redirects or other places where URLs are rendered to the page.
 136         internal static bool IsSafeScheme(String url) {
 137             return url.IndexOf(":", StringComparison.Ordinal) == -1 ||
 138                     url.StartsWith("http:", StringComparison.OrdinalIgnoreCase) ||
 139                     url.StartsWith("https:", StringComparison.OrdinalIgnoreCase) ||
 140                     url.StartsWith("ftp:", StringComparison.OrdinalIgnoreCase) ||
 141                     url.StartsWith("file:", StringComparison.OrdinalIgnoreCase) ||
 142                     url.StartsWith("news:", StringComparison.OrdinalIgnoreCase);
 143         }
 144
 145         // Attempts to split a URI into its constituent pieces.
 146         // Even if this method returns true, one or more of the out parameters might contain a null or empty string, e.g. if there is no query / fragment.
 147         // Concatenating the pieces back together will form the original input string.
 148         internal static bool TrySplitUriForPathEncode(string input, out string schemeAndAuthority, out string path, out string queryAndFragment, bool checkScheme) {
 149             // Strip off ?query and #fragment if they exist, since we're not going to look at them
 150             string inputWithoutQueryFragment;
 151             ExtractQueryAndFragment(input, out inputWithoutQueryFragment, out queryAndFragment);
 152
 153             // DevDiv #450404: UrlPathEncode shouldn't care about the scheme of the incoming URL when it is
 154             // performing encoding; only Response.Redirect should.
 155             bool isValidScheme = (checkScheme) ? IsSafeScheme(inputWithoutQueryFragment) : true;
 156
 157             // Use Uri class to parse the url into authority and path, use that to help decide
 158             // where to split the string. Do not rebuild the url from the Uri instance, as that
 159             // might have subtle changes from the original string (for example, see below about "://").
 160             Uri uri;
 161             if (isValidScheme && Uri.TryCreate(inputWithoutQueryFragment, UriKind.Absolute, out uri)) {
 162                 string authority = uri.Authority; // e.g. "foo:81" in "http://foo:81/bar"
 163                 if (!String.IsNullOrEmpty(authority)) {
 164                     // don't make any assumptions about the scheme or the "://" part.
 165                     // For example, the "//" could be missing, or there could be "///" as in "file:///C:\foo.txt"
 166                     // To retain the same string as originally given, find the authority in the original url and include
 167                     // everything up to that.
 168                     int authorityIndex = inputWithoutQueryFragment.IndexOf(authority, StringComparison.OrdinalIgnoreCase);
 169                     if (authorityIndex != -1) {
 170                         int schemeAndAuthorityLength = authorityIndex + authority.Length;
 171                         schemeAndAuthority = inputWithoutQueryFragment.Substring(0, schemeAndAuthorityLength);
 172                         path = inputWithoutQueryFragment.Substring(schemeAndAuthorityLength);
 173                         return true;
 174                     }
 175                 }
 176             }
 177
 178             // Not a safe URL
 179             schemeAndAuthority = null;
 180             path = null;
 181             queryAndFragment = null;
 182             return false;
 183         }
 184
 185     }
 186 }