2 * UTF7Encoding.cs - Implementation of the
3 * "System.Text.UTF7Encoding" class.
5 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Copyright (c) 2003, 2004, Novell, Inc.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
33 [MonoTODO ("Fix serialization compatibility with MS.NET")]
39 class UTF7Encoding : Encoding
41 // Magic number used by Windows for UTF-7.
42 internal const int UTF7_CODE_PAGE = 65000;
45 private bool allowOptionals;
47 // Encoding rule table for 0x00-0x7F.
48 // 0 - full encode, 1 - direct, 2 - optional, 3 - encode plus.
49 private static readonly byte[] encodingRules = {
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 00
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
52 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 3, 1, 1, 1, 1, // 20
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, // 30
55 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 2, 2, // 50
57 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, // 70
61 // Characters to use to encode 6-bit values in base64.
62 private const String base64Chars =
63 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
65 // Map bytes in base64 to 6-bit values.
66 private static readonly sbyte[] base64Values = {
67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00
68 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10
69 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, 63, // 20
70 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30
72 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40
73 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50
74 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60
75 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, // 70
77 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80
78 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90
79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0
80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0
82 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0
83 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0
85 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0
89 public UTF7Encoding ()
94 public UTF7Encoding (bool allowOptionals)
95 : base (UTF7_CODE_PAGE)
97 this.allowOptionals = allowOptionals;
100 encoding_name = "Unicode (UTF-7)";
101 header_name = "utf-7";
102 is_mail_news_display = true;
103 is_mail_news_save = true;
105 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
108 // Internal version of "GetByteCount" that can handle
109 // a rolling state between calls.
110 private static int InternalGetByteCount
111 (char[] chars, int index, int count, bool flush,
112 int leftOver, bool isInShifted, bool allowOptionals)
114 // Validate the parameters.
116 throw new ArgumentNullException ("chars");
118 if (index < 0 || index > chars.Length) {
119 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
121 if (count < 0 || count > (chars.Length - index)) {
122 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
125 // Determine the length of the output.
127 int leftOverSize = (leftOver >> 8);
128 byte[] rules = encodingRules;
131 ch = (int)(chars[index++]);
140 // Handle characters that must be fully encoded.
141 if ( !isInShifted ) {
147 while (leftOverSize >= 6) {
153 // The character is encoded as itself.
155 if (leftOverSize != 0) {
156 // Flush the previous encoded sequence.
160 // Count the "-" (sequence terminator)
167 // The character may need to be encoded.
168 if (allowOptionals) {
175 // Encode the plus sign as "+-".
177 if (leftOverSize != 0) {
178 // Flush the previous encoded sequence.
182 // Count the "-" (sequence terminator)
190 if (isInShifted && flush) {
191 if (leftOverSize != 0)
193 // Flush the previous encoded sequence.
196 // Count the "-" (sequence terminator)
200 // Return the length to the caller.
204 // Get the number of bytes needed to encode a character buffer.
205 public override int GetByteCount (char[] chars, int index, int count)
207 return InternalGetByteCount (chars, index, count, true, 0, false, allowOptionals);
210 // Internal version of "GetBytes" that can handle a
211 // rolling state between calls.
212 private static int InternalGetBytes
213 (char[] chars, int charIndex, int charCount,
214 byte[] bytes, int byteIndex, bool flush,
215 ref int leftOver, ref bool isInShifted, bool allowOptionals)
217 // Validate the parameters.
219 throw new ArgumentNullException ("chars");
222 throw new ArgumentNullException ("bytes");
224 if (charIndex < 0 || charIndex > chars.Length) {
225 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
227 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
228 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
230 if (byteIndex < 0 || byteIndex > bytes.Length) {
231 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
234 // Convert the characters.
235 int posn = byteIndex;
236 int byteLength = bytes.Length;
237 int leftOverSize = (leftOver >> 8);
238 int leftOverBits = (leftOver & 0xFF);
239 byte[] rules = encodingRules;
240 String base64 = base64Chars;
242 while (charCount > 0) {
243 ch = (int)(chars[charIndex++]);
252 // Handle characters that must be fully encoded.
254 if (posn >= byteLength) {
255 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
257 // Start the sequence
258 bytes[posn++] = (byte)'+';
262 leftOverBits = ((leftOverBits << 16) | ch);
264 while (leftOverSize >= 6) {
265 if (posn >= byteLength) {
266 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
269 bytes[posn++] = (byte)(base64 [leftOverBits >> leftOverSize]);
270 leftOverBits &= ((1 << leftOverSize) - 1);
274 // The character is encoded as itself.
276 if (leftOverSize != 0) {
277 // Flush the previous encoded sequence.
278 if ((posn + 1) > byteLength) {
279 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
281 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
283 if ((posn + 1) > byteLength) {
284 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
286 // Terminate the sequence
287 bytes[posn++] = (byte)'-';
292 if (posn >= byteLength) {
293 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
295 bytes[posn++] = (byte)ch;
298 // The character may need to be encoded.
299 if (allowOptionals) {
306 // Encode the plus sign as "+-".
308 if (leftOverSize != 0) {
309 // Flush the previous encoded sequence.
310 if ((posn + 1) > byteLength) {
311 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
313 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
315 if ((posn + 1) > byteLength) {
316 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
318 // Terminate the sequence
319 bytes[posn++] = (byte)'-';
324 if ((posn + 2) > byteLength) {
325 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
327 bytes[posn++] = (byte)'+';
328 bytes[posn++] = (byte)'-';
332 if (isInShifted && flush) {
333 // Flush the previous encoded sequence.
334 if (leftOverSize != 0) {
335 if ((posn + 1) > byteLength) {
336 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
338 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
340 // Terminate the sequence
341 bytes[posn++] = (byte)'-';
346 leftOver = ((leftOverSize << 8) | leftOverBits);
348 // Return the length to the caller.
349 return posn - byteIndex;
352 // Get the bytes that result from encoding a character buffer.
353 public override int GetBytes (char[] chars, int charIndex, int charCount,
354 byte[] bytes, int byteIndex)
357 bool isInShifted = false;
358 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, true,
359 ref leftOver, ref isInShifted, allowOptionals);
362 // Internal version of "GetCharCount" that can handle
363 // a rolling state between call.s
364 private static int InternalGetCharCount
365 (byte[] bytes, int index, int count, int leftOver)
367 // Validate the parameters.
369 throw new ArgumentNullException ("bytes");
371 if (index < 0 || index > bytes.Length) {
372 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
374 if (count < 0 || count > (bytes.Length - index)) {
375 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
378 // Determine the length of the result.
381 bool normal = ((leftOver & 0x01000000) == 0);
382 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
383 int leftOverSize = ((leftOver >> 16) & 0xFF);
384 sbyte[] base64 = base64Values;
386 byteval = (int)(bytes[index++]);
389 if (byteval != '+') {
390 // Directly-encoded character.
393 // Start of a base64-encoded character.
398 // Process the next byte in a base64 sequence.
399 if (byteval == (int)'-') {
400 // End of a base64 sequence.
406 } else if (base64 [byteval] != -1) {
407 // Extra character in a base64 sequence.
409 if (leftOverSize >= 16) {
422 // Return the final length to the caller.
426 // Get the number of characters needed to decode a byte buffer.
427 public override int GetCharCount (byte[] bytes, int index, int count)
429 return InternalGetCharCount (bytes, index, count, 0);
432 // Internal version of "GetChars" that can handle a
433 // rolling state between calls.
434 private static int InternalGetChars (byte[] bytes, int byteIndex, int byteCount,
435 char[] chars, int charIndex, ref int leftOver)
437 // Validate the parameters.
439 throw new ArgumentNullException ("bytes");
442 throw new ArgumentNullException ("chars");
444 if (byteIndex < 0 || byteIndex > bytes.Length) {
445 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
447 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
448 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
450 if (charIndex < 0 || charIndex > chars.Length) {
451 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
454 // Convert the bytes into characters.
455 int posn = charIndex;
456 int charLength = chars.Length;
457 int byteval, b64value;
458 bool normal = ((leftOver & 0x01000000) == 0);
459 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
460 int leftOverSize = ((leftOver >> 16) & 0xFF);
461 int leftOverBits = (leftOver & 0xFFFF);
462 sbyte[] base64 = base64Values;
463 while (byteCount > 0) {
464 byteval = (int)(bytes[byteIndex++]);
467 if (byteval != '+') {
468 // Directly-encoded character.
469 if (posn >= charLength) {
470 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
472 chars[posn++] = (char)byteval;
474 // Start of a base64-encoded character.
479 // Process the next byte in a base64 sequence.
480 if (byteval == (int)'-') {
481 // End of a base64 sequence.
483 if (posn >= charLength) {
484 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
489 // When decoding, any bits at the end of the Modified Base64 sequence that
490 // do not constitute a complete 16-bit Unicode character are discarded.
491 // If such discarded bits are non-zero the sequence is ill-formed.
496 else if ((b64value = base64[byteval]) != -1)
498 // Extra character in a base64 sequence.
499 leftOverBits = (leftOverBits << 6) | b64value;
501 if (leftOverSize >= 16) {
502 if (posn >= charLength) {
503 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
506 chars[posn++] = (char)(leftOverBits >> leftOverSize);
507 leftOverBits &= ((1 << leftOverSize) - 1);
510 if (posn >= charLength) {
511 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
513 chars[posn++] = (char)byteval;
521 leftOver = (leftOverBits | (leftOverSize << 16) |
522 (normal ? 0 : 0x01000000) |
523 (prevIsPlus ? 0x02000000 : 0));
525 // Return the final length to the caller.
526 return posn - charIndex;
529 // Get the characters that result from decoding a byte buffer.
530 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
531 char[] chars, int charIndex)
534 return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
537 // Get the maximum number of bytes needed to encode a
538 // specified number of characters.
539 public override int GetMaxByteCount (int charCount)
542 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
546 return 8 * (int) (charCount / 3) + (charCount % 3) * 3 + 2;
549 // Get the maximum number of characters needed to decode a
550 // specified number of bytes.
551 public override int GetMaxCharCount (int byteCount)
554 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
559 // Get a UTF7-specific decoder that is attached to this instance.
560 public override Decoder GetDecoder ()
562 return new UTF7Decoder ();
565 // Get a UTF7-specific encoder that is attached to this instance.
566 public override Encoder GetEncoder ()
568 return new UTF7Encoder (allowOptionals);
571 // UTF-7 decoder implementation.
572 private sealed class UTF7Decoder : Decoder
575 private int leftOver;
578 public UTF7Decoder ()
583 // Override inherited methods.
584 public override int GetCharCount (byte[] bytes, int index, int count)
586 return InternalGetCharCount (bytes, index, count, leftOver);
588 public override int GetChars (byte[] bytes, int byteIndex,
589 int byteCount, char[] chars,
592 return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
595 } // class UTF7Decoder
597 // UTF-7 encoder implementation.
598 private sealed class UTF7Encoder : Encoder
600 private bool allowOptionals;
601 private int leftOver = 0;
602 private bool isInShifted = false;
605 public UTF7Encoder (bool allowOptionals)
607 this.allowOptionals = allowOptionals;
610 // Override inherited methods.
611 public override int GetByteCount (char[] chars, int index,
612 int count, bool flush)
614 return InternalGetByteCount
615 (chars, index, count, flush, leftOver, isInShifted, allowOptionals);
617 public override int GetBytes (char[] chars, int charIndex,
618 int charCount, byte[] bytes,
619 int byteIndex, bool flush)
621 return InternalGetBytes (chars, charIndex, charCount,
622 bytes, byteIndex, flush,
623 ref leftOver, ref isInShifted, allowOptionals);
626 } // class UTF7Encoder
628 }; // class UTF7Encoding
630 }; // namespace System.Text