2 * UTF7Encoding.cs - Implementation of the
3 * "System.Text.UTF7Encoding" class.
5 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Copyright (c) 2003, 2004, Novell, Inc.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
31 using System.Runtime.InteropServices;
37 [MonoTODO ("Serialization format not compatible with .NET")]
43 class UTF7Encoding : Encoding
45 // Magic number used by Windows for UTF-7.
46 internal const int UTF7_CODE_PAGE = 65000;
49 private bool allowOptionals;
51 // Encoding rule table for 0x00-0x7F.
52 // 0 - full encode, 1 - direct, 2 - optional, 3 - encode plus.
53 private static readonly byte[] encodingRules = {
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 00
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
56 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 3, 1, 1, 1, 1, // 20
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, // 30
59 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 2, 2, // 50
61 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, // 70
65 // Characters to use to encode 6-bit values in base64.
66 private const String base64Chars =
67 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
69 // Map bytes in base64 to 6-bit values.
70 private static readonly sbyte[] base64Values = {
71 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00
72 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10
73 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, 63, // 20
74 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30
76 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40
77 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50
78 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60
79 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, // 70
81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80
82 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90
83 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0
86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0
87 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0
88 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0
89 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0
93 public UTF7Encoding ()
98 public UTF7Encoding (bool allowOptionals)
99 : base (UTF7_CODE_PAGE)
101 this.allowOptionals = allowOptionals;
104 encoding_name = "Unicode (UTF-7)";
105 header_name = "utf-7";
106 is_mail_news_display = true;
107 is_mail_news_save = true;
109 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
113 public override int GetHashCode ()
115 int basis = base.GetHashCode ();
116 return allowOptionals ? -basis : basis;
120 public override bool Equals (object other)
122 UTF7Encoding e = other as UTF7Encoding;
125 return allowOptionals == e.allowOptionals &&
126 EncoderFallback.Equals (e.EncoderFallback) &&
127 DecoderFallback.Equals (e.DecoderFallback);
131 // Internal version of "GetByteCount" that can handle
132 // a rolling state between calls.
133 private static int InternalGetByteCount
134 (char[] chars, int index, int count, bool flush,
135 int leftOver, bool isInShifted, bool allowOptionals)
137 // Validate the parameters.
139 throw new ArgumentNullException ("chars");
141 if (index < 0 || index > chars.Length) {
142 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
144 if (count < 0 || count > (chars.Length - index)) {
145 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
148 // Determine the length of the output.
150 int leftOverSize = (leftOver >> 8);
151 byte[] rules = encodingRules;
154 ch = (int)(chars[index++]);
163 // Handle characters that must be fully encoded.
164 if ( !isInShifted ) {
170 while (leftOverSize >= 6) {
176 // The character is encoded as itself.
178 if (leftOverSize != 0) {
179 // Flush the previous encoded sequence.
183 // Count the "-" (sequence terminator)
190 // The character may need to be encoded.
191 if (allowOptionals) {
198 // Encode the plus sign as "+-".
200 if (leftOverSize != 0) {
201 // Flush the previous encoded sequence.
205 // Count the "-" (sequence terminator)
213 if (isInShifted && flush) {
214 if (leftOverSize != 0)
216 // Flush the previous encoded sequence.
219 // Count the "-" (sequence terminator)
223 // Return the length to the caller.
227 // Get the number of bytes needed to encode a character buffer.
228 public override int GetByteCount (char[] chars, int index, int count)
230 return InternalGetByteCount (chars, index, count, true, 0, false, allowOptionals);
233 // Internal version of "GetBytes" that can handle a
234 // rolling state between calls.
235 private static int InternalGetBytes
236 (char[] chars, int charIndex, int charCount,
237 byte[] bytes, int byteIndex, bool flush,
238 ref int leftOver, ref bool isInShifted, bool allowOptionals)
240 // Validate the parameters.
242 throw new ArgumentNullException ("chars");
245 throw new ArgumentNullException ("bytes");
247 if (charIndex < 0 || charIndex > chars.Length) {
248 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
250 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
251 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
253 if (byteIndex < 0 || byteIndex > bytes.Length) {
254 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
257 // Convert the characters.
258 int posn = byteIndex;
259 int byteLength = bytes.Length;
260 int leftOverSize = (leftOver >> 8);
261 int leftOverBits = (leftOver & 0xFF);
262 byte[] rules = encodingRules;
263 String base64 = base64Chars;
265 while (charCount > 0) {
266 ch = (int)(chars[charIndex++]);
275 // Handle characters that must be fully encoded.
277 if (posn >= byteLength) {
278 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
280 // Start the sequence
281 bytes[posn++] = (byte)'+';
285 leftOverBits = ((leftOverBits << 16) | ch);
287 while (leftOverSize >= 6) {
288 if (posn >= byteLength) {
289 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
292 bytes[posn++] = (byte)(base64 [leftOverBits >> leftOverSize]);
293 leftOverBits &= ((1 << leftOverSize) - 1);
297 // The character is encoded as itself.
299 if (leftOverSize != 0) {
300 // Flush the previous encoded sequence.
301 if ((posn + 1) > byteLength) {
302 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
304 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
306 if ((posn + 1) > byteLength) {
307 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
309 // Terminate the sequence
310 bytes[posn++] = (byte)'-';
315 if (posn >= byteLength) {
316 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
318 bytes[posn++] = (byte)ch;
321 // The character may need to be encoded.
322 if (allowOptionals) {
329 // Encode the plus sign as "+-".
331 if (leftOverSize != 0) {
332 // Flush the previous encoded sequence.
333 if ((posn + 1) > byteLength) {
334 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
336 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
338 if ((posn + 1) > byteLength) {
339 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
341 // Terminate the sequence
342 bytes[posn++] = (byte)'-';
347 if ((posn + 2) > byteLength) {
348 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
350 bytes[posn++] = (byte)'+';
351 bytes[posn++] = (byte)'-';
355 if (isInShifted && flush) {
356 // Flush the previous encoded sequence.
357 if (leftOverSize != 0) {
358 if ((posn + 1) > byteLength) {
359 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
361 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
363 // Terminate the sequence
364 bytes[posn++] = (byte)'-';
369 leftOver = ((leftOverSize << 8) | leftOverBits);
371 // Return the length to the caller.
372 return posn - byteIndex;
375 // Get the bytes that result from encoding a character buffer.
376 public override int GetBytes (char[] chars, int charIndex, int charCount,
377 byte[] bytes, int byteIndex)
380 bool isInShifted = false;
381 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, true,
382 ref leftOver, ref isInShifted, allowOptionals);
385 // Internal version of "GetCharCount" that can handle
386 // a rolling state between call.s
387 private static int InternalGetCharCount
388 (byte[] bytes, int index, int count, int leftOver)
390 // Validate the parameters.
392 throw new ArgumentNullException ("bytes");
394 if (index < 0 || index > bytes.Length) {
395 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
397 if (count < 0 || count > (bytes.Length - index)) {
398 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
401 // Determine the length of the result.
404 bool normal = ((leftOver & 0x01000000) == 0);
405 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
406 int leftOverSize = ((leftOver >> 16) & 0xFF);
407 sbyte[] base64 = base64Values;
409 byteval = (int)(bytes[index++]);
412 if (byteval != '+') {
413 // Directly-encoded character.
416 // Start of a base64-encoded character.
421 // Process the next byte in a base64 sequence.
422 if (byteval == (int)'-') {
423 // End of a base64 sequence.
429 } else if (base64 [byteval] != -1) {
430 // Extra character in a base64 sequence.
432 if (leftOverSize >= 16) {
445 // Return the final length to the caller.
449 // Get the number of characters needed to decode a byte buffer.
450 public override int GetCharCount (byte[] bytes, int index, int count)
452 return InternalGetCharCount (bytes, index, count, 0);
455 // Internal version of "GetChars" that can handle a
456 // rolling state between calls.
457 private static int InternalGetChars (byte[] bytes, int byteIndex, int byteCount,
458 char[] chars, int charIndex, ref int leftOver)
460 // Validate the parameters.
462 throw new ArgumentNullException ("bytes");
465 throw new ArgumentNullException ("chars");
467 if (byteIndex < 0 || byteIndex > bytes.Length) {
468 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
470 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
471 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
473 if (charIndex < 0 || charIndex > chars.Length) {
474 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
477 // Convert the bytes into characters.
478 int posn = charIndex;
479 int charLength = chars.Length;
480 int byteval, b64value;
481 bool normal = ((leftOver & 0x01000000) == 0);
482 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
483 bool afterHighSurrogate = ((leftOver & 0x04000000) != 0);
484 int leftOverSize = ((leftOver >> 16) & 0xFF);
485 int leftOverBits = (leftOver & 0xFFFF);
486 sbyte[] base64 = base64Values;
487 while (byteCount > 0) {
488 byteval = (int)(bytes[byteIndex++]);
491 if (byteval != '+') {
492 // Directly-encoded character.
493 if (posn >= charLength) {
494 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
496 if (afterHighSurrogate) {
497 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
499 chars[posn++] = (char)byteval;
501 // Start of a base64-encoded character.
506 // Process the next byte in a base64 sequence.
507 if (byteval == (int)'-') {
508 // End of a base64 sequence.
510 if (posn >= charLength) {
511 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
513 if (afterHighSurrogate) {
514 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
519 // When decoding, any bits at the end of the Modified Base64 sequence that
520 // do not constitute a complete 16-bit Unicode character are discarded.
521 // If such discarded bits are non-zero the sequence is ill-formed.
526 else if ((b64value = base64[byteval]) != -1)
528 // Extra character in a base64 sequence.
529 leftOverBits = (leftOverBits << 6) | b64value;
531 if (leftOverSize >= 16) {
532 if (posn >= charLength) {
533 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
536 char nextChar = (char)(leftOverBits >> leftOverSize);
537 if ((nextChar & 0xFC00) == 0xD800) {
538 afterHighSurrogate = true;
540 else if ((nextChar & 0xFC00) == 0xDC00) {
541 if (!afterHighSurrogate) {
542 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
544 afterHighSurrogate = false;
546 chars[posn++] = nextChar;
547 leftOverBits &= ((1 << leftOverSize) - 1);
550 if (posn >= charLength) {
551 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
553 if (afterHighSurrogate) {
554 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
556 chars[posn++] = (char)byteval;
564 leftOver = (leftOverBits | (leftOverSize << 16) |
565 (normal ? 0 : 0x01000000) |
566 (prevIsPlus ? 0x02000000 : 0) |
567 (afterHighSurrogate ? 0x04000000 : 0));
569 // Return the final length to the caller.
570 return posn - charIndex;
573 // Get the characters that result from decoding a byte buffer.
574 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
575 char[] chars, int charIndex)
578 int amount = InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
579 if ((leftOver & 0x04000000) != 0) {
580 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
585 // Get the maximum number of bytes needed to encode a
586 // specified number of characters.
587 public override int GetMaxByteCount (int charCount)
590 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
594 return 8 * (int) (charCount / 3) + (charCount % 3) * 3 + 2;
597 // Get the maximum number of characters needed to decode a
598 // specified number of bytes.
599 public override int GetMaxCharCount (int byteCount)
602 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
607 // Get a UTF7-specific decoder that is attached to this instance.
608 public override Decoder GetDecoder ()
610 return new UTF7Decoder ();
613 // Get a UTF7-specific encoder that is attached to this instance.
614 public override Encoder GetEncoder ()
616 return new UTF7Encoder (allowOptionals);
619 // UTF-7 decoder implementation.
620 private sealed class UTF7Decoder : Decoder
623 private int leftOver;
626 public UTF7Decoder ()
631 // Override inherited methods.
632 public override int GetCharCount (byte[] bytes, int index, int count)
634 return InternalGetCharCount (bytes, index, count, leftOver);
636 public override int GetChars (byte[] bytes, int byteIndex,
637 int byteCount, char[] chars,
640 return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
643 } // class UTF7Decoder
645 // UTF-7 encoder implementation.
646 private sealed class UTF7Encoder : Encoder
648 private bool allowOptionals;
649 private int leftOver = 0;
650 private bool isInShifted = false;
653 public UTF7Encoder (bool allowOptionals)
655 this.allowOptionals = allowOptionals;
658 // Override inherited methods.
659 public override int GetByteCount (char[] chars, int index,
660 int count, bool flush)
662 return InternalGetByteCount
663 (chars, index, count, flush, leftOver, isInShifted, allowOptionals);
665 public override int GetBytes (char[] chars, int charIndex,
666 int charCount, byte[] bytes,
667 int byteIndex, bool flush)
669 return InternalGetBytes (chars, charIndex, charCount,
670 bytes, byteIndex, flush,
671 ref leftOver, ref isInShifted, allowOptionals);
674 } // class UTF7Encoder
677 // a bunch of practically missing implementations (but should just work)
679 [CLSCompliantAttribute (false)]
681 public override unsafe int GetByteCount (char *chars, int count)
683 return base.GetByteCount (chars, count);
687 public override int GetByteCount (string s)
689 return base.GetByteCount (s);
693 [CLSCompliantAttribute (false)]
694 public override unsafe int GetBytes (char *chars, int charCount, byte* bytes, int byteCount)
696 return base.GetBytes (chars, charCount, bytes, byteCount);
700 public override int GetBytes (string s, int charIndex, int charCount, byte [] bytes, int byteIndex)
702 return base.GetBytes (s, charIndex, charCount, bytes, byteIndex);
706 [CLSCompliantAttribute (false)]
707 public override unsafe int GetCharCount (byte *bytes, int count)
709 return base.GetCharCount (bytes, count);
713 [CLSCompliantAttribute (false)]
714 public override unsafe int GetChars (byte* bytes, int byteCount, char* chars, int charCount)
716 return base.GetChars (bytes, byteCount, chars, charCount);
720 public override string GetString (byte [] bytes, int index, int count)
722 return base.GetString (bytes, index, count);
727 }; // class UTF7Encoding
729 }; // namespace System.Text