1 //------------------------------------------------------------------------------
2 // <copyright file="XmlEncoding.cs" company="Microsoft">
3 // Copyright (c) Microsoft Corporation. All rights reserved.
5 // <owner current="true" primary="true">Microsoft</owner>
6 //------------------------------------------------------------------------------
9 using System.Diagnostics;
11 namespace System.Xml {
13 internal class UTF16Decoder : System.Text.Decoder {
14 private bool bigEndian;
16 private const int CharSize = 2;
18 public UTF16Decoder( bool bigEndian ) {
20 this.bigEndian = bigEndian;
23 public override int GetCharCount( byte[] bytes, int index, int count ) {
24 return GetCharCount( bytes, index, count, false );
27 public override int GetCharCount( byte[] bytes, int index, int count, bool flush ) {
28 int byteCount = count + ( ( lastByte >= 0 ) ? 1 : 0 );
29 if ( flush && ( byteCount % CharSize != 0 ) ) {
30 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { -1 } ), (string)null );
32 return byteCount / CharSize;
35 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
36 int charCount = GetCharCount( bytes, byteIndex, byteCount );
38 if ( lastByte >= 0 ) {
39 if ( byteCount == 0 ) {
42 int nextByte = bytes[byteIndex++];
45 chars[charIndex++] = bigEndian
46 ? (char)( lastByte << 8 | nextByte )
47 : (char)( nextByte << 8 | lastByte );
51 if ( ( byteCount & 1 ) != 0 ) {
52 lastByte = bytes[byteIndex + --byteCount];
55 // use the fast BlockCopy if possible
56 if ( bigEndian == BitConverter.IsLittleEndian ) {
57 int byteEnd = byteIndex + byteCount;
59 while ( byteIndex < byteEnd ) {
60 int hi = bytes[byteIndex++];
61 int lo = bytes[byteIndex++];
62 chars[charIndex++] = (char)( hi << 8 | lo );
66 while ( byteIndex < byteEnd ) {
67 int lo = bytes[byteIndex++];
68 int hi = bytes[byteIndex++];
69 chars[charIndex++] = (char)( hi << 8 | lo );
74 Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, byteCount );
79 public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
83 if ( lastByte >= 0 ) {
84 if ( byteCount == 0 ) {
88 int nextByte = bytes[byteIndex++];
92 chars[charIndex++] = bigEndian
93 ? (char)( lastByte << 8 | nextByte )
94 : (char)( nextByte << 8 | lastByte );
100 if ( charCount * CharSize < byteCount ) {
101 byteCount = charCount * CharSize;
108 if ( bigEndian == BitConverter.IsLittleEndian ) {
110 int byteEnd = i + ( byteCount & ~0x1 );
112 while ( i < byteEnd ) {
115 chars[charIndex++] = (char)( hi << 8 | lo );
119 while ( i < byteEnd ) {
122 chars[charIndex++] = (char)( hi << 8 | lo );
127 Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, (int)(byteCount & ~0x1) );
129 charsUsed += byteCount / CharSize;
130 bytesUsed += byteCount;
132 if ( ( byteCount & 1 ) != 0 ) {
133 lastByte = bytes[byteIndex + byteCount - 1];
138 internal class SafeAsciiDecoder : Decoder {
140 public SafeAsciiDecoder() {
143 public override int GetCharCount( byte[] bytes, int index, int count ) {
147 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
150 while ( i < byteIndex + byteCount ) {
151 chars[j++] = (char)bytes[i++];
156 public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
157 if ( charCount < byteCount ) {
158 byteCount = charCount;
167 int byteEndIndex = byteIndex + byteCount;
169 while ( i < byteEndIndex ) {
170 chars[j++] = (char)bytes[i++];
173 charsUsed = byteCount;
174 bytesUsed = byteCount;
179 internal class Ucs4Encoding : Encoding {
180 internal Ucs4Decoder ucs4Decoder;
182 public override string WebName {
184 return this.EncodingName;
188 public override Decoder GetDecoder() {
192 public override int GetByteCount( char[] chars, int index, int count ) {
193 return checked( count * 4 );
196 public override int GetByteCount( char[] chars ) {
197 return chars.Length * 4;
200 public override byte[] GetBytes( string s ) {
201 return null; //ucs4Decoder.GetByteCount(chars, index, count);
203 public override int GetBytes( char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex ) {
206 public override int GetMaxByteCount( int charCount ) {
210 public override int GetCharCount( byte[] bytes, int index, int count ) {
211 return ucs4Decoder.GetCharCount( bytes, index, count );
214 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
215 return ucs4Decoder.GetChars( bytes, byteIndex, byteCount, chars, charIndex );
218 public override int GetMaxCharCount( int byteCount ) {
219 return ( byteCount + 3 ) / 4;
222 public override int CodePage {
228 public override int GetCharCount( byte[] bytes ) {
229 return bytes.Length / 4;
232 public override Encoder GetEncoder() {
236 internal static Encoding UCS4_Littleendian {
238 return new Ucs4Encoding4321();
242 internal static Encoding UCS4_Bigendian {
244 return new Ucs4Encoding1234();
248 internal static Encoding UCS4_2143 {
250 return new Ucs4Encoding2143();
253 internal static Encoding UCS4_3412 {
255 return new Ucs4Encoding3412();
260 internal class Ucs4Encoding1234 : Ucs4Encoding {
262 public Ucs4Encoding1234() {
263 ucs4Decoder = new Ucs4Decoder1234();
266 public override string EncodingName {
268 return "ucs-4 (Bigendian)";
272 public override byte[] GetPreamble() {
273 return new byte[4] { 0x00, 0x00, 0xfe, 0xff };
277 internal class Ucs4Encoding4321 : Ucs4Encoding {
278 public Ucs4Encoding4321() {
279 ucs4Decoder = new Ucs4Decoder4321();
282 public override string EncodingName {
288 public override byte[] GetPreamble() {
289 return new byte[4] { 0xff, 0xfe, 0x00, 0x00 };
293 internal class Ucs4Encoding2143 : Ucs4Encoding {
294 public Ucs4Encoding2143() {
295 ucs4Decoder = new Ucs4Decoder2143();
298 public override string EncodingName {
300 return "ucs-4 (order 2143)";
303 public override byte[] GetPreamble() {
304 return new byte[4] { 0x00, 0x00, 0xff, 0xfe };
308 internal class Ucs4Encoding3412 : Ucs4Encoding {
309 public Ucs4Encoding3412() {
310 ucs4Decoder = new Ucs4Decoder3412();
313 public override string EncodingName {
315 return "ucs-4 (order 3412)";
319 public override byte[] GetPreamble() {
320 return new byte[4] { 0xfe, 0xff, 0x00, 0x00 };
324 internal abstract class Ucs4Decoder : Decoder {
326 internal byte [] lastBytes = new byte[4];
327 internal int lastBytesCount = 0;
329 public override int GetCharCount( byte[] bytes, int index, int count ) {
330 return ( count + lastBytesCount ) / 4;
333 internal abstract int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex );
335 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
336 // finish a character from the bytes that were cached last time
337 int i = lastBytesCount;
338 if ( lastBytesCount > 0 ) {
339 // copy remaining bytes into the cache
340 for ( ; lastBytesCount < 4 && byteCount > 0; lastBytesCount++ ) {
341 lastBytes[lastBytesCount] = bytes[byteIndex];
345 // still not enough bytes -> return
346 if ( lastBytesCount < 4 ) {
349 // decode 1 character from the byte cache
350 i = GetFullChars( lastBytes, 0 , 4, chars, charIndex );
351 Debug.Assert( i == 1 );
359 // decode block of byte quadruplets
360 i = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i;
362 // cache remaining bytes that does not make up a character
363 int bytesLeft = ( byteCount & 0x3 );
364 if ( bytesLeft >= 0 ) {
365 for( int j = 0; j < bytesLeft; j++ ) {
366 lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j];
368 lastBytesCount = bytesLeft;
373 public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
376 // finish a character from the bytes that were cached last time
378 int lbc = lastBytesCount;
380 // copy remaining bytes into the cache
381 for ( ; lbc < 4 && byteCount > 0; lbc++ ) {
382 lastBytes[lbc] = bytes[byteIndex];
387 // still not enough bytes -> return
389 lastBytesCount = lbc;
393 // decode 1 character from the byte cache
394 i = GetFullChars( lastBytes, 0 , 4, chars, charIndex );
395 Debug.Assert( i == 1 );
402 // if that's all that was requested -> return
403 if ( charCount == 0 ) {
404 completed = ( byteCount == 0 );
412 // modify the byte count for GetFullChars depending on how many characters were requested
413 if ( charCount * 4 < byteCount ) {
414 byteCount = charCount * 4;
420 bytesUsed += byteCount;
422 // decode block of byte quadruplets
423 charsUsed = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i;
425 // cache remaining bytes that does not make up a character
426 int bytesLeft = ( byteCount & 0x3 );
427 if ( bytesLeft >= 0 ) {
428 for( int j = 0; j < bytesLeft; j++ ) {
429 lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j];
431 lastBytesCount = bytesLeft;
435 internal void Ucs4ToUTF16(uint code, char[] chars, int charIndex) {
436 chars[charIndex] = (char)(XmlCharType.SurHighStart + (char)((code >> 16) - 1) + (char)((code >> 10) & 0x3F));
437 chars[charIndex + 1] = (char)(XmlCharType.SurLowStart + (char)(code & 0x3FF));
441 internal class Ucs4Decoder4321 : Ucs4Decoder {
443 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
447 byteCount += byteIndex;
449 for ( i = byteIndex, j = charIndex; i + 3 < byteCount; ) {
450 code = (uint)( ( bytes[i+3] << 24 ) | ( bytes[i+2] << 16 ) | ( bytes[i+1] << 8 ) | bytes[i] );
451 if ( code > 0x10FFFF ) {
452 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
454 else if ( code > 0xFFFF ) {
455 Ucs4ToUTF16(code, chars, j);
459 if ( XmlCharType.IsSurrogate( (int)code ) ) {
460 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
463 chars[j] = (char)code;
469 return j - charIndex;
473 internal class Ucs4Decoder1234 : Ucs4Decoder {
475 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
479 byteCount += byteIndex;
481 for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
482 code = (uint)( ( bytes[i] << 24 ) | ( bytes[i+1] << 16 ) | ( bytes[i+2] << 8 ) | bytes[i+3] );
483 if ( code > 0x10FFFF ) {
484 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
486 else if ( code > 0xFFFF ) {
487 Ucs4ToUTF16(code, chars, j);
491 if ( XmlCharType.IsSurrogate( (int)code ) ) {
492 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
495 chars[j] = (char)code;
501 return j - charIndex;
506 internal class Ucs4Decoder2143 : Ucs4Decoder {
508 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
512 byteCount += byteIndex;
514 for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
515 code = (uint)( ( bytes[i+1] << 24 ) | ( bytes[i] << 16 ) | ( bytes[i+3] << 8 ) | bytes[i+2] );
516 if ( code > 0x10FFFF ) {
517 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
519 else if ( code > 0xFFFF ) {
520 Ucs4ToUTF16(code, chars, j);
524 if ( XmlCharType.IsSurrogate( (int)code ) ) {
525 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
528 chars[j] = (char)code;
534 return j - charIndex;
539 internal class Ucs4Decoder3412 : Ucs4Decoder {
541 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
545 byteCount += byteIndex;
547 for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
548 code = (uint)( ( bytes[i+2] << 24 ) | ( bytes[i+3] << 16 ) | ( bytes[i] << 8 ) | bytes[i+1] );
549 if ( code > 0x10FFFF ) {
550 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
552 else if ( code > 0xFFFF ) {
553 Ucs4ToUTF16(code, chars, j);
557 if ( XmlCharType.IsSurrogate( (int)code ) ) {
558 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
561 chars[j] = (char)code;
567 return j - charIndex;