using System.Text;
using I18N.Common;
+#if DISABLE_UNSAFE
+using MonoEncoder = I18N.Common.MonoSafeEncoder;
+using MonoEncoding = I18N.Common.MonoSafeEncoding;
+#endif
+
namespace I18N.CJK
{
[Serializable]
{
// Constructor.
public GB18030Encoding ()
- : base (54936)
+ : base (54936, 936)
{
}
get { return "Chinese Simplified (GB18030)"; }
}
+ public override string HeaderName {
+ get { return "GB18030"; }
+ }
+
+ public override string BodyName {
+ get { return "GB18030"; }
+ }
+
public override string WebName {
get { return "GB18030"; }
}
+ public override bool IsMailNewsDisplay {
+ get { return true; }
+ }
+
+ public override bool IsMailNewsSave {
+ get { return true; }
+ }
+
+ public override bool IsBrowserDisplay {
+ get { return true; }
+ }
+
+ public override bool IsBrowserSave {
+ get { return true; }
+ }
+
public override int GetMaxByteCount (int len)
{
// non-GB2312 characters in \u0080 - \uFFFF
return len;
}
- public override int GetByteCount (char [] chars, int index, int length)
+#if !DISABLE_UNSAFE
+ public unsafe override int GetByteCountImpl (char* chars, int count)
{
- return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
+ return new GB18030Encoder (this).GetByteCountImpl (chars, count, true);
}
public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
{
return new GB18030Encoder (this).GetBytesImpl (chars, charCount, bytes, byteCount, true);
}
+#else
+ public override int GetByteCount (char [] chars, int index, int length)
+ {
+ return new GB18030Encoder (this).GetByteCount (chars, index, length, true);
+ }
+
+ public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+ {
+ return new GB18030Encoder (this).GetBytes (chars, charIndex, charCount, bytes, byteIndex, true);
+ }
+#endif
public override int GetCharCount (byte [] bytes, int start, int len)
{
{
return new GB18030Decoder ().GetChars (bytes, byteIdx, srclen, chars, charIdx);
}
+
+ public override Encoder GetEncoder ()
+ {
+ return new GB18030Encoder (this);
+ }
+
+ public override Decoder GetDecoder ()
+ {
+ return new GB18030Decoder ();
+ }
}
- class GB18030Decoder : Decoder
+ class GB18030Decoder : DbcsEncoding.DbcsDecoder
{
static DbcsConvert gb2312 = DbcsConvert.Gb2312;
// for now incomplete block is not supported - should we?
// int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
+ public GB18030Decoder ()
+ : base (null)
+ {
+ }
+
public override int GetCharCount (byte [] bytes, int start, int len)
{
- if (bytes == null)
- throw new ArgumentNullException ("bytes");
- if (start < 0 || start > bytes.Length)
- throw new ArgumentOutOfRangeException ("start");
- if (len < 0 || start + len > bytes.Length)
- throw new ArgumentOutOfRangeException ("len");
+ CheckRange (bytes, start, len);
int end = start + len;
int ret = 0;
public override int GetChars (byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
{
- if (bytes == null)
- throw new ArgumentNullException ("bytes");
- if (chars == null)
- throw new ArgumentNullException ("chars");
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException ("byteIndex");
- if (byteCount < 0 || byteIndex + byteCount > bytes.Length)
- throw new ArgumentOutOfRangeException ("byteCount");
- if (charIndex < 0 || charIndex > chars.Length)
- throw new ArgumentOutOfRangeException ("charIndex");
+ CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
int byteEnd = byteIndex + byteCount;
int charStart = charIndex;
} else {
byte first = bytes [byteIndex];
int ord = ((first - 0x81) * 191 + second - 0x40) * 2;
- char c1 = (char) (gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
+ char c1 = ord < 0 || ord >= gb2312.n2u.Length ?
+ '\0' : (char) (gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
if (c1 == 0)
chars [charIndex++] = '?';
else
}
}
- class GB18030Encoder : MonoEncoding.MonoEncoder
+ class GB18030Encoder : MonoEncoder
{
static DbcsConvert gb2312 = DbcsConvert.Gb2312;
{
}
- char incomplete;
+ char incomplete_byte_count;
+ char incomplete_bytes;
- public override int GetByteCount (char [] chars, int start, int len, bool refresh)
+#if !DISABLE_UNSAFE
+ public unsafe override int GetByteCountImpl (char* chars, int count, bool refresh)
{
- if (refresh)
- incomplete = char.MinValue;
-
- if (chars == null)
- throw new ArgumentNullException ("chars");
- if (start < 0 || start > chars.Length)
- throw new ArgumentOutOfRangeException ("index");
- if (len < 0 || start + len > chars.Length)
- throw new ArgumentOutOfRangeException ("count");
-
- int end = start + len;
+ int start = 0;
+ int end = count;
int ret = 0;
while (start < end) {
char ch = chars [start];
continue;
} else if (Char.IsSurrogate (ch)) {
// Surrogate
- if (start + 1 == end)
- break; // incomplete
- ret += 4;
- start += 2;
+ if (start + 1 == end) {
+ incomplete_byte_count = ch;
+ start++;
+ } else {
+ ret += 4;
+ start += 2;
+ }
continue;
}
}
// non-GB2312
- ret += 4;
+ long value = GB18030Source.FromUCS (ch);
+ if (value < 0)
+ ret++; // invalid(?)
+ else
+ ret += 4;
start++;
}
+
+ if (refresh) {
+ if (incomplete_byte_count != char.MinValue)
+ ret++;
+ incomplete_byte_count = char.MinValue;
+ }
return ret;
}
- public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount, bool flush)
+ public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount, bool refresh)
{
int charIndex = 0;
int byteIndex = 0;
-#if NET_2_0
- EncoderFallbackBuffer buffer = null;
-#endif
int charEnd = charIndex + charCount;
int byteStart = byteIndex;
- char ch = incomplete;
+ char ch = incomplete_bytes;
while (charIndex < charEnd) {
- if (incomplete == char.MinValue)
+ if (incomplete_bytes == char.MinValue)
ch = chars [charIndex++];
else
- incomplete = char.MinValue;
+ incomplete_bytes = char.MinValue;
if (ch < 0x80) {
// ASCII
} else if (Char.IsSurrogate (ch)) {
// Surrogate
if (charIndex == charEnd) {
- incomplete = ch;
+ incomplete_bytes = ch;
break; // incomplete
}
char ch2 = chars [charIndex++];
if (!Char.IsSurrogate (ch2)) {
// invalid surrogate
-#if NET_2_0
HandleFallback (
chars, ref charIndex, ref charCount,
- bytes, ref byteIndex, ref byteCount);
-#else
- bytes [byteIndex++] = (byte) '?';
-#endif
+ bytes, ref byteIndex, ref byteCount, null);
continue;
}
int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
}
long value = GB18030Source.FromUCS (ch);
+ if (value < 0)
+ bytes [byteIndex++] = 0x3F; // invalid(?)
+ else {
+ // non-GB2312
+ GB18030Source.Unlinear (bytes + byteIndex, value);
+ byteIndex += 4;
+ }
+ }
+
+ if (refresh) {
+ if (incomplete_bytes != char.MinValue)
+ bytes [byteIndex++] = 0x3F; // incomplete
+ incomplete_bytes = char.MinValue;
+ }
+
+ return byteIndex - byteStart;
+ }
+#else
+
+ public override int GetByteCount(char[] chars, int index, int count, bool refresh)
+ {
+ int start = 0;
+ int end = count;
+ int ret = 0;
+ while (start < end)
+ {
+ char ch = chars[start];
+ if (ch < 0x80)
+ {
+ // ASCII
+ ret++;
+ start++;
+ continue;
+ }
+ else if (Char.IsSurrogate(ch))
+ {
+ // Surrogate
+ if (start + 1 == end)
+ {
+ incomplete_byte_count = ch;
+ start++;
+ }
+ else
+ {
+ ret += 4;
+ start += 2;
+ }
+ continue;
+ }
+
+ if (ch < 0x80 || ch == 0xFF)
+ {
+ // ASCII
+ ret++;
+ start++;
+ continue;
+ }
+
+ byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
+ byte b2 = gb2312.u2n[((int)ch) * 2];
+ if (b1 != 0 && b2 != 0)
+ {
+ // GB2312
+ ret += 2;
+ start++;
+ continue;
+ }
+
// non-GB2312
- GB18030Source.Unlinear (bytes + byteIndex, value);
- byteIndex += 4;
+ long value = GB18030Source.FromUCS(ch);
+ if (value < 0)
+ ret++; // invalid(?)
+ else
+ ret += 4;
+ start++;
}
+
+ if (refresh)
+ {
+ if (incomplete_byte_count != char.MinValue)
+ ret++;
+ incomplete_byte_count = char.MinValue;
+ }
+ return ret;
+ }
+
+ public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool refresh)
+ {
+ int byteCount = bytes.Length;
+ int charEnd = charIndex + charCount;
+ int byteStart = byteIndex;
+ char ch = incomplete_bytes;
+
+ while (charIndex < charEnd)
+ {
+ if (incomplete_bytes == char.MinValue)
+ ch = chars[charIndex++];
+ else
+ incomplete_bytes = char.MinValue;
+
+ if (ch < 0x80)
+ {
+ // ASCII
+ bytes[byteIndex++] = (byte)ch;
+ continue;
+ }
+ else if (Char.IsSurrogate(ch))
+ {
+ // Surrogate
+ if (charIndex == charEnd)
+ {
+ incomplete_bytes = ch;
+ break; // incomplete
+ }
+ char ch2 = chars[charIndex++];
+ if (!Char.IsSurrogate(ch2))
+ {
+ // invalid surrogate
+ HandleFallback (chars, ref charIndex, ref charCount,
+ bytes, ref byteIndex, ref byteCount, null);
+ continue;
+ }
+ int cp = (ch - 0xD800) * 0x400 + ch2 - 0xDC00;
+ GB18030Source.Unlinear(bytes, byteIndex, GB18030Source.FromUCSSurrogate(cp));
+ byteIndex += 4;
+ continue;
+ }
+
+
+ if (ch <= 0x80 || ch == 0xFF)
+ {
+ // Character maps to itself
+ bytes[byteIndex++] = (byte)ch;
+ continue;
+ }
+
+ byte b1 = gb2312.u2n[((int)ch) * 2 + 1];
+ byte b2 = gb2312.u2n[((int)ch) * 2];
+ if (b1 != 0 && b2 != 0)
+ {
+ bytes[byteIndex++] = b1;
+ bytes[byteIndex++] = b2;
+ continue;
+ }
+
+ long value = GB18030Source.FromUCS(ch);
+ if (value < 0)
+ bytes[byteIndex++] = 0x3F; // invalid(?)
+ else
+ {
+ // non-GB2312
+ GB18030Source.Unlinear(bytes, byteIndex, value);
+ byteIndex += 4;
+ }
+ }
+
+ if (refresh)
+ {
+ if (incomplete_bytes != char.MinValue)
+ bytes[byteIndex++] = 0x3F; // incomplete
+ incomplete_bytes = char.MinValue;
+ }
+
return byteIndex - byteStart;
}
+#endif
}
}