//
// IdnMapping.cs
//
// Author:
//	Atsushi Enomoto  <atsushi@ximian.com>
//
// Copyright (C) 2007 Novell, Inc (http://www.novell.com)
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
// 
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//

#if NET_2_0

/*

** related RFCs

	RFC 3490: IDNA
	RFC 3491: Nameprep
	RFC 3492: Punycode
	RFC 3454: STRINGPREP

Prohibited in [Nameprep]: C.1.2, C.2.2, C.3 - C.9 in [STRINGPREP]

	C.1.2 non-ascii spaces (00A0, 1680, 2000-200B, 202F, 205F, 3000)
	C.2.2 non-ascii controls (0080-009F, 06DD, 070F, 180E, 200C, 200D,
	      2028, 2029, 2060-2063, 206A-206F, FEFF, FFF9-FFFC, 1D173-1D17A)
	C.3 private use (E000-F8FF, F0000-FFFFD, 100000-10FFFD)
	C.4 non-characters (FDD0-FDEF, FFFE-FFFF, nFFFE-nFFFF)
	C.5 surrogate code (D800-DFFF)
	C.6 inappropriate for plain text (FFF9-FFFD)
	C.7 inappropriate for canonical representation (2FF0-2FFB)
	C.8 change display properties or are deprecated (0340, 0341,
		200E, 200F, 202A-202E, 206A-206F)
	C.9 tagging characters (E0001, E0020-E007F)

*/

using System;
using System.Text;

namespace System.Globalization
{
	public sealed class IdnMapping
	{
		bool allow_unassigned, use_std3;
		Punycode puny = new Punycode ();

		public IdnMapping ()
		{
		}

		public bool AllowUnassigned {
			get { return allow_unassigned; }
			set { allow_unassigned = value; }
		}

		public bool UseStd3AsciiRules {
			get { return use_std3; }
			set { use_std3 = value; }
		}

		public override bool Equals (object obj)
		{
			IdnMapping other = obj as IdnMapping;
			return other != null &&
			       allow_unassigned == other.allow_unassigned &&
			       use_std3 == other.use_std3;
		}

		public override int GetHashCode ()
		{
			return (allow_unassigned ? 2 : 0) + (use_std3 ? 1 : 0);
		}

		#region GetAscii

		public string GetAscii (string unicode)
		{
			if (unicode == null)
				throw new ArgumentNullException ("unicode");
			return GetAscii (unicode, 0, unicode.Length);
		}

		public string GetAscii (string unicode, int index)
		{
			if (unicode == null)
				throw new ArgumentNullException ("unicode");
			return GetAscii (unicode, index, unicode.Length - index);
		}

		public string GetAscii (string unicode, int index, int count)
		{
			if (unicode == null)
				throw new ArgumentNullException ("unicode");
			if (index < 0)
				throw new ArgumentOutOfRangeException ("index must be non-negative value");
			if (count < 0 || index + count > unicode.Length)
				throw new ArgumentOutOfRangeException ("index + count must point inside the argument unicode string");

			return Convert (unicode, index, count, true);
		}

		string Convert (string input, int index, int count, bool toAscii)
		{
			string s = input.Substring (index, count);

			// Actually lowering string is done as part of
			// Nameprep(), but it is much easier to do it in prior.
			for (int i = 0; i < s.Length; i++)
				if (s [i] >= '\x80') {
					s = s.ToLower (CultureInfo.InvariantCulture);
					break;
				}

			// RFC 3490 section 4. and 4.1
			// 1) -> done as AllowUnassigned property
			// 2) split the input
			string [] labels = s.Split ('.', '\u3002', '\uFF0E', '\uFF61');
			int iter = 0;
			for (int i = 0; i < labels.Length; iter += labels [i].Length, i++) {
				// 3) -> done as UseStd3AsciiRules property
				// 4) ToAscii
				if (labels [i].Length == 0 && i + 1 == labels.Length)
					// If the input ends with '.', Split()
					// adds another empty string. In that
					// case, we have to ignore it.
					continue;
				if (toAscii)
					labels [i] = ToAscii (labels [i], iter);
				else
					labels [i] = ToUnicode (labels [i], iter);
			}
			// 5) join them
			return String.Join (".", labels);
		}

		string ToAscii (string s, int offset)
		{
			// 1.
			for (int i = 0; i < s.Length; i++) {
				// I wonder if this check is really RFC-conformant
				if (s [i] < '\x20' || s [i] == '\x7F')
					throw new ArgumentException (String.Format ("Not allowed character was found, at {0}", offset + i));
				if (s [i] >= 0x80) {
					// 2.
					s = NamePrep (s, offset);
					break;
				}
			}

			// 3.
			if (use_std3)
				VerifyStd3AsciiRules (s, offset);

			// 4.
			for (int i = 0; i < s.Length; i++) {
				if (s [i] >= 0x80) {
					// 5. check ACE.
					if (s.StartsWith ("xn--", StringComparison.OrdinalIgnoreCase))
						throw new ArgumentException (String.Format ("The input string must not start with ACE (xn--), at {0}", offset + i));
					// 6. Punycode it.
					s = puny.Encode (s, offset);
					// 7. prepend ACE.
					s = "xn--" + s;
					break;
				}
			}

			// 8.
			VerifyLength (s, offset);

			return s;
		}

		void VerifyLength (string s, int offset)
		{
			if (s.Length == 0)
				throw new ArgumentException (String.Format ("A label in the input string resulted in an invalid zero-length string, at {0}", offset));
			if (s.Length > 63)
				throw new ArgumentException (String.Format ("A label in the input string exceeded the length in ASCII representation, at {0}", offset));
		}

		string NamePrep (string s, int offset)
		{
			s = s.Normalize (NormalizationForm.FormKC);
			VerifyProhibitedCharacters (s, offset);
			// FIXME: check BIDI

			if (!allow_unassigned) {
				for (int i = 0; i < s.Length; i++)
					if (Char.GetUnicodeCategory (s, i) == UnicodeCategory.OtherNotAssigned)
						throw new ArgumentException (String.Format ("Use of unassigned Unicode characer is prohibited in this IdnMapping, at {0}", offset + i));
			}
			return s;
		}

		void VerifyProhibitedCharacters (string s, int offset)
		{
			for (int i = 0; i < s.Length; i++) {
				switch (Char.GetUnicodeCategory (s, i)) {
				case UnicodeCategory.SpaceSeparator:
					if (s [i] < '\x80')
						continue; // valid
					break;
				case UnicodeCategory.Control:
					if (s [i] != '\x0' && s [i] < '\x80')
						continue; // valid
					break;
				case UnicodeCategory.PrivateUse:
				case UnicodeCategory.Surrogate:
					break;
				default:
					char c = s [i];
					if (// C.4
					    '\uFDDF' <= c && c <= '\uFDEF' ||
					    ((int) c & 0xFFFF) == 0xFFFE ||
					    // C.6
					    '\uFFF9' <= c && c <= '\uFFFD' ||
					    // C.7
					    '\u2FF0' <= c && c <= '\u2FFB' ||
					    // C.8
					    '\u202A' <= c && c <= '\u202E' ||
					    '\u206A' <= c && c <= '\u206F')
						break;
					switch (c) {
					// C.8
					case '\u0340':
					case '\u0341':
					case '\u200E':
					case '\u200F':
					// C.2.2
					case '\u2028':
					case '\u2029':
						break;
					default:
						continue;
					}
					break;
				}
				throw new ArgumentException (String.Format ("Not allowed character was in the input string, at {0}", offset + i));
			}
		}

		void VerifyStd3AsciiRules (string s, int offset)
		{
			if (s.Length > 0 && s [0] == '-')
				throw new ArgumentException (String.Format ("'-' is not allowed at head of a sequence in STD3 mode, found at {0}", offset));
			if (s.Length > 0 && s [s.Length - 1] == '-')
				throw new ArgumentException (String.Format ("'-' is not allowed at tail of a sequence in STD3 mode, found at {0}", offset + s.Length - 1));

			for (int i = 0; i < s.Length; i++) {
				char c = s [i];
				if (c == '-')
					continue;
				if (c <= '\x2F' || '\x3A' <= c && c <= '\x40' || '\x5B' <= c && c <= '\x60' || '\x7B' <= c && c <= '\x7F')
					throw new ArgumentException (String.Format ("Not allowed character in STD3 mode, found at {0}", offset + i));
			}
		}

		#endregion

		public string GetUnicode (string ascii)
		{
			if (ascii == null)
				throw new ArgumentNullException ("ascii");
			return GetUnicode (ascii, 0, ascii.Length);
		}

		public string GetUnicode (string ascii, int index)
		{
			if (ascii == null)
				throw new ArgumentNullException ("ascii");
			return GetUnicode (ascii, index, ascii.Length - index);
		}

		public string GetUnicode (string ascii, int index, int count)
		{
			if (ascii == null)
				throw new ArgumentNullException ("ascii");
			if (index < 0)
				throw new ArgumentOutOfRangeException ("index must be non-negative value");
			if (count < 0 || index + count > ascii.Length)
				throw new ArgumentOutOfRangeException ("index + count must point inside the argument ascii string");

			return Convert (ascii, index, count, false);
		}

		string ToUnicode (string s, int offset)
		{
			// 1.
			for (int i = 0; i < s.Length; i++) {
				if (s [i] >= 0x80) {
					// 2.
					s = NamePrep (s, offset);
					break;
				}
			}

			// 3.
			if (!s.StartsWith ("xn--", StringComparison.OrdinalIgnoreCase))
				return s; // failure = return the input string as is.
			// Actually lowering string is done as part of
			// Nameprep(), but it is much easier to do it in prior.
			s = s.ToLower (CultureInfo.InvariantCulture);

			string at3 = s;

			// 4.
			s = s.Substring (4);

			// 5.
			s = puny.Decode (s, offset);
			string at5 = s;

			// 6.
			s = ToAscii (s, offset);

			// 7.
			if (String.Compare (at3, s, StringComparison.OrdinalIgnoreCase) != 0)
				throw new ArgumentException (String.Format ("ToUnicode() failed at verifying the result, at label part from {0}", offset));

			// 8.
			return at5;
		}
	}

	class Bootstring
	{
		readonly char delimiter;
		readonly int base_num, tmin, tmax, skew, damp, initial_bias, initial_n;
		
		public Bootstring (char delimiter,
				 int baseNum, int tmin, int tmax,
				 int skew, int damp,
				 int initialBias, int initialN)
		{
			this.delimiter = delimiter;
			base_num = baseNum;
			this.tmin = tmin;
			this.tmax = tmax;
			this.skew = skew;
			this.damp = damp;
			initial_bias = initialBias;
			initial_n = initialN;
		}

		public string Encode (string s, int offset)
		{
			int n = initial_n;
			int delta = 0;
			int bias = initial_bias;
			int b = 0, h = 0;
			StringBuilder sb = new StringBuilder ();
			for (int i = 0; i < s.Length; i++)
				if (s [i] < '\x80')
					sb.Append (s [i]);
			b = h = sb.Length;
			if (b > 0)
				sb.Append (delimiter);

			while (h < s.Length) {
				int m = int.MaxValue;
				for (int i = 0; i < s.Length; i++)
					if (s [i] >= n && s [i] < m)
						m = s [i];
				checked { delta += (m - n) * (h + 1); }
				n = m;
				for (int i = 0; i < s.Length; i++) {
					char c = s [i];
					if (c < n || c < '\x80')
						checked { delta++; }
					if (c == n) {
						int q = delta;
						for (int k = base_num; ;k += base_num) {
							int t =
								k <= bias + tmin ? tmin :
								k >= bias + tmax ? tmax :
								k - bias;
							if (q < t)
								break;
							sb.Append (EncodeDigit (t + (q - t) % (base_num - t)));
							q = (q - t) / (base_num - t);
						}
						sb.Append (EncodeDigit (q));
						bias = Adapt (delta, h + 1, h == b);
						delta = 0;
						h++;
					}
				}
				delta++;
				n++;
			}

			return sb.ToString ();
		}

		// 41..5A (A-Z) = 0-25
		// 61..7A (a-z) = 0-25
		// 30..39 (0-9) = 26-35
		char EncodeDigit (int d)
		{
			return (char) (d < 26 ? d + 'a' : d - 26 + '0');
		}

		int DecodeDigit (char c)
		{
			return  c - '0' < 10 ? c - 22 :
				c - 'A' < 26 ? c - 'A' :
				c - 'a' < 26 ? c - 'a' : base_num;
		}

		int Adapt (int delta, int numPoints, bool firstTime)
		{
			if (firstTime)
				delta = delta / damp;
			else
				delta = delta / 2;
			delta = delta + (delta / numPoints);
			int k = 0;
			while (delta > ((base_num - tmin) * tmax) / 2) {
				delta = delta / (base_num - tmin);
				k += base_num;
			}
			return k + (((base_num - tmin + 1) * delta) / (delta + skew));
		}

		public string Decode (string s, int offset)
		{
			int n = initial_n;
			int i = 0;
			int bias = initial_bias;
			int b = 0;
			StringBuilder sb = new StringBuilder ();

			for (int j = 0; j < s.Length; j++) {
				if (s [j] == delimiter)
					b = j;
			}
			if (b < 0)
				return s;
			sb.Append (s, 0, b);

			for (int z = b > 0 ? b + 1 : 0; z < s.Length; ) {
				int old_i = i;
				int w = 1;
				for (int k = base_num; ; k += base_num) {
					int digit = DecodeDigit (s [z++]);
					i = i + digit * w;
					int t = k <= bias + tmin ? tmin :
						k >= bias + tmax ? tmax :
						k - bias;
					if (digit < t)
						break;
					w = w * (base_num - t);
				}
				bias = Adapt (i - old_i, sb.Length + 1, old_i == 0);
				n = n + i / (sb.Length + 1);
				i = i % (sb.Length + 1);
				if (n < '\x80')
					throw new ArgumentException (String.Format ("Invalid Bootstring decode result, at {0}", offset + z));
				sb.Insert (i, (char) n);
				i++;
			}

			return sb.ToString ();
		}
	}

	class Punycode : Bootstring
	{
		public Punycode ()
			: base ('-', 36, 1, 26, 38, 700, 72, 0x80)
		{
		}
	}
}
#endif