5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2007 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 Prohibited in [Nameprep]: C.1.2, C.2.2, C.3 - C.9 in [STRINGPREP]
40 C.1.2 non-ascii spaces (00A0, 1680, 2000-200B, 202F, 205F, 3000)
41 C.2.2 non-ascii controls (0080-009F, 06DD, 070F, 180E, 200C, 200D,
42 2028, 2029, 2060-2063, 206A-206F, FEFF, FFF9-FFFC, 1D173-1D17A)
43 C.3 private use (E000-F8FF, F0000-FFFFD, 100000-10FFFD)
44 C.4 non-characters (FDD0-FDEF, FFFE-FFFF, nFFFE-nFFFF)
45 C.5 surrogate code (D800-DFFF)
46 C.6 inappropriate for plain text (FFF9-FFFD)
47 C.7 inappropriate for canonical representation (2FF0-2FFB)
48 C.8 change display properties or are deprecated (0340, 0341,
49 200E, 200F, 202A-202E, 206A-206F)
50 C.9 tagging characters (E0001, E0020-E007F)
57 namespace System.Globalization
59 public sealed class IdnMapping
61 bool allow_unassigned, use_std3;
62 Punycode puny = new Punycode ();
68 public bool AllowUnassigned {
69 get { return allow_unassigned; }
70 set { allow_unassigned = value; }
73 public bool UseStd3AsciiRules {
74 get { return use_std3; }
75 set { use_std3 = value; }
78 public override bool Equals (object obj)
80 IdnMapping other = obj as IdnMapping;
81 return other != null &&
82 allow_unassigned == other.allow_unassigned &&
83 use_std3 == other.use_std3;
86 public override int GetHashCode ()
88 return (allow_unassigned ? 2 : 0) + (use_std3 ? 1 : 0);
93 public string GetAscii (string unicode)
96 throw new ArgumentNullException ("unicode");
97 return GetAscii (unicode, 0, unicode.Length);
100 public string GetAscii (string unicode, int index)
103 throw new ArgumentNullException ("unicode");
104 return GetAscii (unicode, index, unicode.Length - index);
107 public string GetAscii (string unicode, int index, int count)
110 throw new ArgumentNullException ("unicode");
112 throw new ArgumentOutOfRangeException ("index must be non-negative value");
113 if (count < 0 || index + count > unicode.Length)
114 throw new ArgumentOutOfRangeException ("index + count must point inside the argument unicode string");
116 return Convert (unicode, index, count, true);
119 string Convert (string input, int index, int count, bool toAscii)
121 string s = input.Substring (index, count);
123 // Actually lowering string is done as part of
124 // Nameprep(), but it is much easier to do it in prior.
125 for (int i = 0; i < s.Length; i++)
126 if (s [i] >= '\x80') {
127 s = s.ToLower (CultureInfo.InvariantCulture);
131 // RFC 3490 section 4. and 4.1
132 // 1) -> done as AllowUnassigned property
133 // 2) split the input
134 string [] labels = s.Split ('.', '\u3002', '\uFF0E', '\uFF61');
136 for (int i = 0; i < labels.Length; iter += labels [i].Length, i++) {
137 // 3) -> done as UseStd3AsciiRules property
139 if (labels [i].Length == 0 && i + 1 == labels.Length)
140 // If the input ends with '.', Split()
141 // adds another empty string. In that
142 // case, we have to ignore it.
145 labels [i] = ToAscii (labels [i], iter);
147 labels [i] = ToUnicode (labels [i], iter);
150 return String.Join (".", labels);
153 string ToAscii (string s, int offset)
156 for (int i = 0; i < s.Length; i++) {
157 // I wonder if this check is really RFC-conformant
158 if (s [i] < '\x20' || s [i] == '\x7F')
159 throw new ArgumentException (String.Format ("Not allowed character was found, at {0}", offset + i));
162 s = NamePrep (s, offset);
169 VerifyStd3AsciiRules (s, offset);
172 for (int i = 0; i < s.Length; i++) {
175 if (s.StartsWith ("xn--", StringComparison.OrdinalIgnoreCase))
176 throw new ArgumentException (String.Format ("The input string must not start with ACE (xn--), at {0}", offset + i));
178 s = puny.Encode (s, offset);
186 VerifyLength (s, offset);
191 void VerifyLength (string s, int offset)
194 throw new ArgumentException (String.Format ("A label in the input string resulted in an invalid zero-length string, at {0}", offset));
196 throw new ArgumentException (String.Format ("A label in the input string exceeded the length in ASCII representation, at {0}", offset));
199 string NamePrep (string s, int offset)
201 s = s.Normalize (NormalizationForm.FormKC);
202 VerifyProhibitedCharacters (s, offset);
205 if (!allow_unassigned) {
206 for (int i = 0; i < s.Length; i++)
207 if (Char.GetUnicodeCategory (s, i) == UnicodeCategory.OtherNotAssigned)
208 throw new ArgumentException (String.Format ("Use of unassigned Unicode characer is prohibited in this IdnMapping, at {0}", offset + i));
213 void VerifyProhibitedCharacters (string s, int offset)
215 for (int i = 0; i < s.Length; i++) {
216 switch (Char.GetUnicodeCategory (s, i)) {
217 case UnicodeCategory.SpaceSeparator:
221 case UnicodeCategory.Control:
222 if (s [i] != '\x0' && s [i] < '\x80')
225 case UnicodeCategory.PrivateUse:
226 case UnicodeCategory.Surrogate:
231 '\uFDDF' <= c && c <= '\uFDEF' ||
232 ((int) c & 0xFFFF) == 0xFFFE ||
234 '\uFFF9' <= c && c <= '\uFFFD' ||
236 '\u2FF0' <= c && c <= '\u2FFB' ||
238 '\u202A' <= c && c <= '\u202E' ||
239 '\u206A' <= c && c <= '\u206F')
256 throw new ArgumentException (String.Format ("Not allowed character was in the input string, at {0}", offset + i));
260 void VerifyStd3AsciiRules (string s, int offset)
262 if (s.Length > 0 && s [0] == '-')
263 throw new ArgumentException (String.Format ("'-' is not allowed at head of a sequence in STD3 mode, found at {0}", offset));
264 if (s.Length > 0 && s [s.Length - 1] == '-')
265 throw new ArgumentException (String.Format ("'-' is not allowed at tail of a sequence in STD3 mode, found at {0}", offset + s.Length - 1));
267 for (int i = 0; i < s.Length; i++) {
271 if (c <= '\x2F' || '\x3A' <= c && c <= '\x40' || '\x5B' <= c && c <= '\x60' || '\x7B' <= c && c <= '\x7F')
272 throw new ArgumentException (String.Format ("Not allowed character in STD3 mode, found at {0}", offset + i));
278 public string GetUnicode (string ascii)
281 throw new ArgumentNullException ("ascii");
282 return GetUnicode (ascii, 0, ascii.Length);
285 public string GetUnicode (string ascii, int index)
288 throw new ArgumentNullException ("ascii");
289 return GetUnicode (ascii, index, ascii.Length - index);
292 public string GetUnicode (string ascii, int index, int count)
295 throw new ArgumentNullException ("ascii");
297 throw new ArgumentOutOfRangeException ("index must be non-negative value");
298 if (count < 0 || index + count > ascii.Length)
299 throw new ArgumentOutOfRangeException ("index + count must point inside the argument ascii string");
301 return Convert (ascii, index, count, false);
304 string ToUnicode (string s, int offset)
307 for (int i = 0; i < s.Length; i++) {
310 s = NamePrep (s, offset);
316 if (!s.StartsWith ("xn--", StringComparison.OrdinalIgnoreCase))
317 return s; // failure = return the input string as is.
318 // Actually lowering string is done as part of
319 // Nameprep(), but it is much easier to do it in prior.
320 s = s.ToLower (CultureInfo.InvariantCulture);
328 s = puny.Decode (s, offset);
332 s = ToAscii (s, offset);
335 if (String.Compare (at3, s, StringComparison.OrdinalIgnoreCase) != 0)
336 throw new ArgumentException (String.Format ("ToUnicode() failed at verifying the result, at label part from {0}", offset));
345 readonly char delimiter;
346 readonly int base_num, tmin, tmax, skew, damp, initial_bias, initial_n;
348 public Bootstring (char delimiter,
349 int baseNum, int tmin, int tmax,
351 int initialBias, int initialN)
353 this.delimiter = delimiter;
359 initial_bias = initialBias;
360 initial_n = initialN;
363 public string Encode (string s, int offset)
367 int bias = initial_bias;
369 StringBuilder sb = new StringBuilder ();
370 for (int i = 0; i < s.Length; i++)
375 sb.Append (delimiter);
377 while (h < s.Length) {
378 int m = int.MaxValue;
379 for (int i = 0; i < s.Length; i++)
380 if (s [i] >= n && s [i] < m)
382 checked { delta += (m - n) * (h + 1); }
384 for (int i = 0; i < s.Length; i++) {
386 if (c < n || c < '\x80')
390 for (int k = base_num; ;k += base_num) {
392 k <= bias + tmin ? tmin :
393 k >= bias + tmax ? tmax :
397 sb.Append (EncodeDigit (t + (q - t) % (base_num - t)));
398 q = (q - t) / (base_num - t);
400 sb.Append (EncodeDigit (q));
401 bias = Adapt (delta, h + 1, h == b);
410 return sb.ToString ();
413 // 41..5A (A-Z) = 0-25
414 // 61..7A (a-z) = 0-25
415 // 30..39 (0-9) = 26-35
416 char EncodeDigit (int d)
418 return (char) (d < 26 ? d + 'a' : d - 26 + '0');
421 int DecodeDigit (char c)
423 return c - '0' < 10 ? c - 22 :
424 c - 'A' < 26 ? c - 'A' :
425 c - 'a' < 26 ? c - 'a' : base_num;
428 int Adapt (int delta, int numPoints, bool firstTime)
431 delta = delta / damp;
434 delta = delta + (delta / numPoints);
436 while (delta > ((base_num - tmin) * tmax) / 2) {
437 delta = delta / (base_num - tmin);
440 return k + (((base_num - tmin + 1) * delta) / (delta + skew));
443 public string Decode (string s, int offset)
447 int bias = initial_bias;
449 StringBuilder sb = new StringBuilder ();
451 for (int j = 0; j < s.Length; j++) {
452 if (s [j] == delimiter)
459 for (int z = b > 0 ? b + 1 : 0; z < s.Length; ) {
462 for (int k = base_num; ; k += base_num) {
463 int digit = DecodeDigit (s [z++]);
465 int t = k <= bias + tmin ? tmin :
466 k >= bias + tmax ? tmax :
470 w = w * (base_num - t);
472 bias = Adapt (i - old_i, sb.Length + 1, old_i == 0);
473 n = n + i / (sb.Length + 1);
474 i = i % (sb.Length + 1);
476 throw new ArgumentException (String.Format ("Invalid Bootstring decode result, at {0}", offset + z));
477 sb.Insert (i, (char) n);
481 return sb.ToString ();
485 class Punycode : Bootstring
488 : base ('-', 36, 1, 26, 38, 700, 72, 0x80)