2 * UTF8Encoding.cs - Implementation of the "System.Text.UTF8Encoding" class.
4 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
5 * Copyright (C) 2004 Novell, Inc (http://www.novell.com)
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
30 using System.Runtime.InteropServices;
33 [MonoLimitation ("Serialization format not compatible with .NET")]
35 public class UTF8Encoding : Encoding
37 // Magic number used by Windows for UTF-8.
38 internal const int UTF8_CODE_PAGE = 65001;
41 private bool emitIdentifier;
44 public UTF8Encoding () : this (false, false) {}
45 public UTF8Encoding (bool encoderShouldEmitUTF8Identifier)
46 : this (encoderShouldEmitUTF8Identifier, false) {}
48 public UTF8Encoding (bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes)
49 : base (UTF8_CODE_PAGE)
51 emitIdentifier = encoderShouldEmitUTF8Identifier;
52 if (throwOnInvalidBytes)
53 SetFallbackInternal (EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
55 SetFallbackInternal (EncoderFallback.StandardSafeFallback, DecoderFallback.StandardSafeFallback);
57 web_name = body_name = header_name = "utf-8";
58 encoding_name = "Unicode (UTF-8)";
59 is_browser_save = true;
60 is_browser_display = true;
61 is_mail_news_display = true;
62 is_mail_news_save = true;
63 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
66 #region GetByteCount()
68 // Internal version of "GetByteCount" which can handle a rolling
69 // state between multiple calls to this method.
70 private static int InternalGetByteCount (char[] chars, int index, int count, EncoderFallback fallback, ref char leftOver, bool flush)
72 // Validate the parameters.
74 throw new ArgumentNullException ("chars");
76 if (index < 0 || index > chars.Length) {
77 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
79 if (count < 0 || count > (chars.Length - index)) {
80 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
83 if (index == chars.Length) {
84 if (flush && leftOver != '\0') {
85 // Flush the left-over surrogate pair start.
93 fixed (char* cptr = chars) {
94 return InternalGetByteCount (cptr + index, count, fallback, ref leftOver, flush);
99 private unsafe static int InternalGetByteCount (char* chars, int count, EncoderFallback fallback, ref char leftOver, bool flush)
102 char* end = chars + count;
104 EncoderFallbackBuffer buffer = null;
105 while (chars < end) {
107 for (; chars < end; chars++) {
108 if (*chars < '\x80') {
110 } else if (*chars < '\x800') {
112 } else if (*chars < '\uD800' || *chars > '\uDFFF') {
114 } else if (*chars <= '\uDBFF') {
115 // This is a surrogate start char, exit the inner loop only
116 // if we don't find the complete surrogate pair.
117 if (chars + 1 < end && chars [1] >= '\uDC00' && chars [1] <= '\uDFFF') {
126 // We have a surrogate tail without
127 // leading surrogate. In NET_2_0 it
128 // uses fallback. In NET_1_1 we output
130 char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
131 fixed (char *fb_chars = fallback_chars) {
133 length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true);
140 if (*chars >= '\uDC00' && *chars <= '\uDFFF') {
141 // We have a correct surrogate pair.
145 // We have a surrogate start followed by a
146 // regular character. Technically, this is
147 // invalid, but we have to do something.
148 // We write out the surrogate start and then
149 // re-visit the current character again.
150 char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
151 fixed (char *fb_chars = fallback_chars) {
153 length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true);
160 // Flush the left-over surrogate pair start.
161 if (leftOver != '\0') {
169 unsafe static char [] GetFallbackChars (char *chars, char *start, EncoderFallback fallback, ref EncoderFallbackBuffer buffer)
172 buffer = fallback.CreateFallbackBuffer ();
174 buffer.Fallback (*chars, (int) (chars - start));
176 char [] fallback_chars = new char [buffer.Remaining];
177 for (int i = 0; i < fallback_chars.Length; i++)
178 fallback_chars [i] = buffer.GetNextChar ();
182 return fallback_chars;
185 // Get the number of bytes needed to encode a character buffer.
186 public override int GetByteCount (char[] chars, int index, int count)
189 return InternalGetByteCount (chars, index, count, EncoderFallback, ref dummy, true);
193 [CLSCompliant (false)]
195 public unsafe override int GetByteCount (char* chars, int count)
198 throw new ArgumentNullException ("chars");
202 return InternalGetByteCount (chars, count, EncoderFallback, ref dummy, true);
209 // Internal version of "GetBytes" which can handle a rolling
210 // state between multiple calls to this method.
211 private static int InternalGetBytes (char[] chars, int charIndex,
212 int charCount, byte[] bytes,
214 EncoderFallback fallback, ref EncoderFallbackBuffer buffer,
215 ref char leftOver, bool flush)
217 // Validate the parameters.
219 throw new ArgumentNullException ("chars");
222 throw new ArgumentNullException ("bytes");
224 if (charIndex < 0 || charIndex > chars.Length) {
225 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
227 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
228 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
230 if (byteIndex < 0 || byteIndex > bytes.Length) {
231 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
234 if (charIndex == chars.Length) {
235 if (flush && leftOver != '\0') {
236 // FIXME: use EncoderFallback.
238 // By default it is empty, so I do nothing for now.
245 fixed (char* cptr = chars) {
246 if (bytes.Length == byteIndex)
247 return InternalGetBytes (
248 cptr + charIndex, charCount,
249 null, 0, fallback, ref buffer, ref leftOver, flush);
250 fixed (byte *bptr = bytes) {
251 return InternalGetBytes (
252 cptr + charIndex, charCount,
253 bptr + byteIndex, bytes.Length - byteIndex,
254 fallback, ref buffer,
255 ref leftOver, flush);
261 private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush)
263 char* end = chars + count;
265 byte* start_bytes = bytes;
266 byte* end_bytes = bytes + bcount;
267 while (chars < end) {
269 for (; chars < end; chars++) {
272 if (bytes >= end_bytes)
275 } else if (ch < '\x800') {
276 if (bytes + 1 >= end_bytes)
278 bytes [0] = (byte) (0xC0 | (ch >> 6));
279 bytes [1] = (byte) (0x80 | (ch & 0x3F));
281 } else if (ch < '\uD800' || ch > '\uDFFF') {
282 if (bytes + 2 >= end_bytes)
284 bytes [0] = (byte) (0xE0 | (ch >> 12));
285 bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
286 bytes [2] = (byte) (0x80 | (ch & 0x3F));
288 } else if (ch <= '\uDBFF') {
289 // This is a surrogate char, exit the inner loop.
294 // We have a surrogate tail without
295 // leading surrogate. In NET_2_0 it
296 // uses fallback. In NET_1_1 we output
298 char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
300 if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes)
302 fixed (char *fb_chars = fallback_chars) {
303 bytes += InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true);
310 if (*chars >= '\uDC00' && *chars <= '\uDFFF') {
311 // We have a correct surrogate pair.
312 int ch = 0x10000 + (int) *chars - 0xDC00 + (((int) leftOver - 0xD800) << 10);
313 if (bytes + 3 >= end_bytes)
315 bytes [0] = (byte) (0xF0 | (ch >> 18));
316 bytes [1] = (byte) (0x80 | ((ch >> 12) & 0x3F));
317 bytes [2] = (byte) (0x80 | ((ch >> 6) & 0x3F));
318 bytes [3] = (byte) (0x80 | (ch & 0x3F));
322 // We have a surrogate start followed by a
323 // regular character. Technically, this is
324 // invalid, but we have to do something.
325 // We write out the surrogate start and then
326 // re-visit the current character again.
327 char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
329 if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes)
331 fixed (char *fb_chars = fallback_chars) {
332 InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true);
341 // Flush the left-over surrogate pair start.
342 if (leftOver != '\0') {
344 if (bytes + 2 < end_bytes) {
345 bytes [0] = (byte) (0xE0 | (ch >> 12));
346 bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
347 bytes [2] = (byte) (0x80 | (ch & 0x3F));
355 return (int)(bytes - (end_bytes - bcount));
357 throw new ArgumentException ("Insufficient Space", "bytes");
360 // Get the bytes that result from encoding a character buffer.
361 public override int GetBytes (char[] chars, int charIndex, int charCount,
362 byte[] bytes, int byteIndex)
364 char leftOver = '\0';
365 EncoderFallbackBuffer buffer = null;
366 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, EncoderFallback, ref buffer, ref leftOver, true);
369 // Convenience wrappers for "GetBytes".
370 public override int GetBytes (String s, int charIndex, int charCount,
371 byte[] bytes, int byteIndex)
373 // Validate the parameters.
375 throw new ArgumentNullException ("s");
378 throw new ArgumentNullException ("bytes");
380 if (charIndex < 0 || charIndex > s.Length) {
381 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
383 if (charCount < 0 || charCount > (s.Length - charIndex)) {
384 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
386 if (byteIndex < 0 || byteIndex > bytes.Length) {
387 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
390 if (charIndex == s.Length)
394 fixed (char* cptr = s) {
396 EncoderFallbackBuffer buffer = null;
397 if (bytes.Length == byteIndex)
398 return InternalGetBytes (
399 cptr + charIndex, charCount,
400 null, 0, EncoderFallback, ref buffer, ref dummy, true);
401 fixed (byte *bptr = bytes) {
402 return InternalGetBytes (
403 cptr + charIndex, charCount,
404 bptr + byteIndex, bytes.Length - byteIndex,
405 EncoderFallback, ref buffer,
412 [CLSCompliant (false)]
414 public unsafe override int GetBytes (char* chars, int charCount, byte* bytes, int byteCount)
417 throw new ArgumentNullException ("chars");
419 throw new IndexOutOfRangeException ("charCount");
421 throw new ArgumentNullException ("bytes");
423 throw new IndexOutOfRangeException ("charCount");
429 EncoderFallbackBuffer buffer = null;
431 return InternalGetBytes (chars, charCount, null, 0, EncoderFallback, ref buffer, ref dummy, true);
433 return InternalGetBytes (chars, charCount, bytes, byteCount, EncoderFallback, ref buffer, ref dummy, true);
438 // Internal version of "GetCharCount" which can handle a rolling
439 // state between multiple calls to this method.
440 private unsafe static int InternalGetCharCount (
441 byte[] bytes, int index, int count, uint leftOverBits,
442 uint leftOverCount, object provider,
443 ref DecoderFallbackBuffer fallbackBuffer, ref byte [] bufferArg, bool flush)
445 // Validate the parameters.
447 throw new ArgumentNullException ("bytes");
449 if (index < 0 || index > bytes.Length) {
450 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
452 if (count < 0 || count > (bytes.Length - index)) {
453 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
458 fixed (byte *bptr = bytes)
459 return InternalGetCharCount (bptr + index, count,
460 leftOverBits, leftOverCount, provider, ref fallbackBuffer, ref bufferArg, flush);
463 private unsafe static int InternalGetCharCount (
464 byte* bytes, int count, uint leftOverBits,
465 uint leftOverCount, object provider,
466 ref DecoderFallbackBuffer fallbackBuffer, ref byte [] bufferArg, bool flush)
472 if (leftOverCount == 0) {
473 int end = index + count;
474 for (; index < end; index++, count--) {
475 if (bytes [index] < 0x80)
482 // Determine the number of characters that we have.
484 uint leftBits = leftOverBits;
485 uint leftSoFar = (leftOverCount & (uint)0x0F);
486 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
488 ch = (uint)(bytes[index++]);
491 // Process a UTF-8 start character.
492 if (ch < (uint)0x0080) {
493 // Single-byte UTF-8 character.
495 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
496 // Double-byte UTF-8 character.
497 leftBits = (ch & (uint)0x1F);
500 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
501 // Three-byte UTF-8 character.
502 leftBits = (ch & (uint)0x0F);
505 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
506 // Four-byte UTF-8 character.
507 leftBits = (ch & (uint)0x07);
510 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
511 // Five-byte UTF-8 character.
512 leftBits = (ch & (uint)0x03);
515 } else if ((ch & (uint)0xFE) == (uint)0xFC) {
516 // Six-byte UTF-8 character.
517 leftBits = (ch & (uint)0x03);
521 // Invalid UTF-8 start character.
522 length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1, 1);
525 // Process an extra byte in a multi-byte sequence.
526 if ((ch & (uint)0xC0) == (uint)0x80) {
527 leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
528 if (++leftSoFar >= leftSize) {
529 // We have a complete character now.
530 if (leftBits < (uint)0x10000) {
531 // is it an overlong ?
532 bool overlong = false;
535 overlong = (leftBits <= 0x7F);
538 overlong = (leftBits <= 0x07FF);
541 overlong = (leftBits <= 0xFFFF);
544 overlong = (leftBits <= 0x1FFFFF);
547 overlong = (leftBits <= 0x03FFFFFF);
551 length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
553 else if ((leftBits & 0xF800) == 0xD800) {
554 // UTF-8 doesn't use surrogate characters
555 length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
559 } else if (leftBits < (uint)0x110000) {
562 length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
567 // Invalid UTF-8 sequence: clear and restart.
568 length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
575 if (flush && leftSize != 0) {
576 // We had left-over bytes that didn't make up
577 // a complete UTF-8 character sequence.
578 length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar);
581 // Return the final length to the caller.
585 // for GetCharCount()
586 static unsafe int Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte [] bufferArg, byte* bytes, long index, uint size)
588 if (buffer == null) {
589 DecoderFallback fb = provider as DecoderFallback;
591 buffer = fb.CreateFallbackBuffer ();
593 buffer = ((Decoder) provider).FallbackBuffer;
595 if (bufferArg == null)
596 bufferArg = new byte [1];
598 for (int i = 0; i < size; i++) {
599 bufferArg [0] = bytes [(int) index + i];
600 buffer.Fallback (bufferArg, 0);
601 ret += buffer.Remaining;
608 static unsafe void Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte [] bufferArg, byte* bytes, long byteIndex, uint size,
609 char* chars, ref int charIndex)
611 if (buffer == null) {
612 DecoderFallback fb = provider as DecoderFallback;
614 buffer = fb.CreateFallbackBuffer ();
616 buffer = ((Decoder) provider).FallbackBuffer;
618 if (bufferArg == null)
619 bufferArg = new byte [1];
620 for (int i = 0; i < size; i++) {
621 bufferArg [0] = bytes [byteIndex + i];
622 buffer.Fallback (bufferArg, 0);
623 while (buffer.Remaining > 0)
624 chars [charIndex++] = buffer.GetNextChar ();
629 // Get the number of characters needed to decode a byte buffer.
630 public override int GetCharCount (byte[] bytes, int index, int count)
632 DecoderFallbackBuffer buf = null;
633 byte [] bufferArg = null;
634 return InternalGetCharCount (bytes, index, count, 0, 0, DecoderFallback, ref buf, ref bufferArg, true);
637 [CLSCompliant (false)]
639 public unsafe override int GetCharCount (byte* bytes, int count)
641 DecoderFallbackBuffer buf = null;
642 byte [] bufferArg = null;
643 return InternalGetCharCount (bytes, count, 0, 0, DecoderFallback, ref buf, ref bufferArg, true);
646 // Get the characters that result from decoding a byte buffer.
647 private unsafe static int InternalGetChars (
648 byte[] bytes, int byteIndex, int byteCount, char[] chars,
649 int charIndex, ref uint leftOverBits, ref uint leftOverCount,
651 ref DecoderFallbackBuffer fallbackBuffer, ref byte [] bufferArg, bool flush)
653 // Validate the parameters.
655 throw new ArgumentNullException ("bytes");
658 throw new ArgumentNullException ("chars");
660 if (byteIndex < 0 || byteIndex > bytes.Length) {
661 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
663 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
664 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
666 if (charIndex < 0 || charIndex > chars.Length) {
667 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
670 if (charIndex == chars.Length)
673 fixed (char* cptr = chars) {
674 if (byteCount == 0 || byteIndex == bytes.Length)
675 return InternalGetChars (null, 0, cptr + charIndex, chars.Length - charIndex, ref leftOverBits, ref leftOverCount, provider, ref fallbackBuffer, ref bufferArg, flush);
677 fixed (byte* bptr = bytes)
678 return InternalGetChars (bptr + byteIndex, byteCount, cptr + charIndex, chars.Length - charIndex, ref leftOverBits, ref leftOverCount, provider, ref fallbackBuffer, ref bufferArg, flush);
682 private unsafe static int InternalGetChars (
683 byte* bytes, int byteCount, char* chars, int charCount,
684 ref uint leftOverBits, ref uint leftOverCount,
686 ref DecoderFallbackBuffer fallbackBuffer, ref byte [] bufferArg, bool flush)
688 int charIndex = 0, byteIndex = 0;
689 int length = charCount;
690 int posn = charIndex;
692 if (leftOverCount == 0) {
693 int end = byteIndex + byteCount;
694 for (; byteIndex < end; posn++, byteIndex++, byteCount--) {
695 if (bytes [byteIndex] < 0x80)
696 chars [posn] = (char) bytes [byteIndex];
702 // Convert the bytes into the output buffer.
704 uint leftBits = leftOverBits;
705 uint leftSoFar = (leftOverCount & (uint)0x0F);
706 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
708 int byteEnd = byteIndex + byteCount;
709 for(; byteIndex < byteEnd; byteIndex++) {
710 // Fetch the next character from the byte buffer.
711 ch = (uint)(bytes[byteIndex]);
713 // Process a UTF-8 start character.
714 if (ch < (uint)0x0080) {
715 // Single-byte UTF-8 character.
716 if (posn >= length) {
717 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
719 chars[posn++] = (char)ch;
720 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
721 // Double-byte UTF-8 character.
722 leftBits = (ch & (uint)0x1F);
725 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
726 // Three-byte UTF-8 character.
727 leftBits = (ch & (uint)0x0F);
730 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
731 // Four-byte UTF-8 character.
732 leftBits = (ch & (uint)0x07);
735 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
736 // Five-byte UTF-8 character.
737 leftBits = (ch & (uint)0x03);
740 } else if ((ch & (uint)0xFE) == (uint)0xFC) {
741 // Six-byte UTF-8 character.
742 leftBits = (ch & (uint)0x03);
746 // Invalid UTF-8 start character.
747 Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, 1, chars, ref posn);
750 // Process an extra byte in a multi-byte sequence.
751 if ((ch & (uint)0xC0) == (uint)0x80) {
752 leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
753 if (++leftSoFar >= leftSize) {
754 // We have a complete character now.
755 if (leftBits < (uint)0x10000) {
756 // is it an overlong ?
757 bool overlong = false;
760 overlong = (leftBits <= 0x7F);
763 overlong = (leftBits <= 0x07FF);
766 overlong = (leftBits <= 0xFFFF);
769 overlong = (leftBits <= 0x1FFFFF);
772 overlong = (leftBits <= 0x03FFFFFF);
776 Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
778 else if ((leftBits & 0xF800) == 0xD800) {
779 // UTF-8 doesn't use surrogate characters
780 Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
783 if (posn >= length) {
784 throw new ArgumentException
785 (_("Arg_InsufficientSpace"), "chars");
787 chars[posn++] = (char)leftBits;
789 } else if (leftBits < (uint)0x110000) {
790 if ((posn + 2) > length) {
791 throw new ArgumentException
792 (_("Arg_InsufficientSpace"), "chars");
794 leftBits -= (uint)0x10000;
795 chars[posn++] = (char)((leftBits >> 10) +
798 (char)((leftBits & (uint)0x3FF) + (uint)0xDC00);
800 Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
805 // Invalid UTF-8 sequence: clear and restart.
806 Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
812 if (flush && leftSize != 0) {
813 // We had left-over bytes that didn't make up
814 // a complete UTF-8 character sequence.
815 Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn);
817 leftOverBits = leftBits;
818 leftOverCount = (leftSoFar | (leftSize << 4));
820 // Return the final length to the caller.
821 return posn - charIndex;
824 // Get the characters that result from decoding a byte buffer.
825 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
826 char[] chars, int charIndex)
828 uint leftOverBits = 0;
829 uint leftOverCount = 0;
830 DecoderFallbackBuffer buf = null;
831 byte [] bufferArg = null;
832 return InternalGetChars (bytes, byteIndex, byteCount, chars,
833 charIndex, ref leftOverBits, ref leftOverCount, DecoderFallback, ref buf, ref bufferArg, true);
836 [CLSCompliant (false)]
838 public unsafe override int GetChars (byte* bytes, int byteCount, char* chars, int charCount)
840 DecoderFallbackBuffer buf = null;
841 byte [] bufferArg = null;
842 uint leftOverBits = 0;
843 uint leftOverCount = 0;
844 return InternalGetChars (bytes, byteCount, chars,
845 charCount, ref leftOverBits, ref leftOverCount, DecoderFallback, ref buf, ref bufferArg, true);
848 // Get the maximum number of bytes needed to encode a
849 // specified number of characters.
850 public override int GetMaxByteCount (int charCount)
853 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
855 return charCount * 4;
858 // Get the maximum number of characters needed to decode a
859 // specified number of bytes.
860 public override int GetMaxCharCount (int byteCount)
863 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
868 // Get a UTF8-specific decoder that is attached to this instance.
869 public override Decoder GetDecoder ()
871 return new UTF8Decoder (DecoderFallback);
874 // Get a UTF8-specific encoder that is attached to this instance.
875 public override Encoder GetEncoder ()
877 return new UTF8Encoder (EncoderFallback, emitIdentifier);
880 // Get the UTF8 preamble.
881 public override byte[] GetPreamble ()
884 return new byte [] { 0xEF, 0xBB, 0xBF };
889 // Determine if this object is equal to another.
890 public override bool Equals (Object value)
892 UTF8Encoding enc = (value as UTF8Encoding);
894 return (codePage == enc.codePage &&
895 emitIdentifier == enc.emitIdentifier &&
896 DecoderFallback.Equals (enc.DecoderFallback) &&
897 EncoderFallback.Equals (enc.EncoderFallback));
903 // Get the hash code for this object.
904 public override int GetHashCode ()
906 return base.GetHashCode ();
909 public override int GetByteCount (string chars)
911 // hmm, does this override make any sense?
912 return base.GetByteCount (chars);
916 public override string GetString (byte [] bytes, int index, int count)
918 // hmm, does this override make any sense?
919 return base.GetString (bytes, index, count);
922 // UTF-8 decoder implementation.
924 private class UTF8Decoder : Decoder
926 private uint leftOverBits;
927 private uint leftOverCount;
930 public UTF8Decoder (DecoderFallback fallback)
937 // Override inherited methods.
938 public override int GetCharCount (byte[] bytes, int index, int count)
940 DecoderFallbackBuffer buf = null;
941 byte [] bufferArg = null;
942 return InternalGetCharCount (bytes, index, count,
943 leftOverBits, leftOverCount, this, ref buf, ref bufferArg, false);
945 public override int GetChars (byte[] bytes, int byteIndex,
946 int byteCount, char[] chars, int charIndex)
948 DecoderFallbackBuffer buf = null;
949 byte [] bufferArg = null;
950 return InternalGetChars (bytes, byteIndex, byteCount,
951 chars, charIndex, ref leftOverBits, ref leftOverCount, this, ref buf, ref bufferArg, false);
954 } // class UTF8Decoder
956 // UTF-8 encoder implementation.
958 private class UTF8Encoder : Encoder
960 // private bool emitIdentifier;
961 private char leftOverForCount;
962 private char leftOverForConv;
965 public UTF8Encoder (EncoderFallback fallback, bool emitIdentifier)
968 // this.emitIdentifier = emitIdentifier;
969 leftOverForCount = '\0';
970 leftOverForConv = '\0';
973 // Override inherited methods.
974 public override int GetByteCount (char[] chars, int index,
975 int count, bool flush)
977 return InternalGetByteCount (chars, index, count, Fallback, ref leftOverForCount, flush);
979 public override int GetBytes (char[] chars, int charIndex,
980 int charCount, byte[] bytes, int byteIndex, bool flush)
983 EncoderFallbackBuffer buffer = null;
984 result = InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, Fallback, ref buffer, ref leftOverForConv, flush);
985 // emitIdentifier = false;
989 public unsafe override int GetByteCount (char* chars, int count, bool flush)
991 return InternalGetByteCount (chars, count, Fallback, ref leftOverForCount, flush);
994 public unsafe override int GetBytes (char* chars, int charCount,
995 byte* bytes, int byteCount, bool flush)
998 EncoderFallbackBuffer buffer = null;
999 result = InternalGetBytes (chars, charCount, bytes, byteCount, Fallback, ref buffer, ref leftOverForConv, flush);
1000 // emitIdentifier = false;
1003 } // class UTF8Encoder
1005 }; // class UTF8Encoding
1007 }; // namespace System.Text