2004-07-07 Sebastien Pouliot <sebastien@ximian.com>
[mono.git] / mcs / class / corlib / System.Text / UTF7Encoding.cs
1 /*
2  * UTF7Encoding.cs - Implementation of the
3  *              "System.Text.UTF7Encoding" class.
4  *
5  * Copyright (c) 2002  Southern Storm Software, Pty Ltd
6  * Copyright (c) 2003, 2004, Novell, Inc.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining
9  * a copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included
16  * in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26
27 namespace System.Text
28 {
29
30 using System;
31
32 [Serializable]
33 [MonoTODO ("Fix serialization compatibility with MS.NET")]
34 #if ECMA_COMPAT
35 internal
36 #else
37 public
38 #endif
39 class UTF7Encoding : Encoding
40 {
41         // Magic number used by Windows for UTF-7.
42         internal const int UTF7_CODE_PAGE = 65000;
43
44         // Internal state.
45         private bool allowOptionals;
46
47         // Encoding rule table for 0x00-0x7F.
48         // 0 - full encode, 1 - direct, 2 - optional, 3 - encode plus.
49         private static readonly byte[] encodingRules = {
50                 0, 0, 0, 0, 0, 0, 0, 0,   0, 1, 1, 0, 0, 1, 0, 0,       // 00
51                 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,       // 10
52                 1, 2, 2, 2, 2, 2, 2, 1,   1, 1, 2, 3, 1, 1, 1, 1,       // 20
53                 1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 2, 2, 2, 2, 1,       // 30
54
55                 2, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,       // 40
56                 1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 2, 0, 2, 2, 2,       // 50
57                 2, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,       // 60
58                 1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 2, 2, 2, 0, 0,       // 70
59         };
60
61         // Characters to use to encode 6-bit values in base64.
62         private const String base64Chars =
63                 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
64
65         // Map bytes in base64 to 6-bit values.
66         private static readonly sbyte[] base64Values = {
67                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // 00
68                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // 10
69                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, 62, -1, -1, 63, // 20
70                 52, 53, 54, 55, 56, 57, 58, 59,   60, 61, -1, -1, -1, -1, -1, -1, // 30
71
72                 -1,  0,  1,  2,  3,  4,  5,  6,    7,  8,  9, 10, 11, 12, 13, 14, // 40
73                 15, 16, 17, 18, 19, 20, 21, 22,   23, 24, 25, -1, -1, -1, -1, -1, // 50
74                 -1, 26, 27, 28, 29, 30, 31, 32,   33, 34, 35, 36, 37, 38, 39, 40, // 60
75                 41, 42, 43, 44, 45, 46, 47, 48,   49, 50, 51, -1, -1, -1, -1, -1, // 70
76
77                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // 80
78                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // 90
79                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // A0
80                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // B0
81
82                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // C0
83                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // D0
84                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // E0
85                 -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1, // F0
86         };
87
88         // Constructors.
89         public UTF7Encoding ()
90         : this (false)
91         {
92         }
93         
94         public UTF7Encoding (bool allowOptionals)
95         : base (UTF7_CODE_PAGE)
96         {
97                 this.allowOptionals = allowOptionals;
98                 
99                 body_name = "utf-7";
100                 encoding_name = "Unicode (UTF-7)";
101                 header_name = "utf-7";
102                 is_mail_news_display = true;
103                 is_mail_news_save = true;
104                 web_name = "utf-7";
105                 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
106         }
107
108         // Internal version of "GetByteCount" that can handle
109         // a rolling state between calls.
110         private static int InternalGetByteCount
111                                 (char[] chars, int index, int count, bool flush,
112                                  int leftOver, bool allowOptionals)
113         {
114                 // Validate the parameters.
115                 if (chars == null) {
116                         throw new ArgumentNullException ("chars");
117                 }
118                 if (index < 0 || index > chars.Length) {
119                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
120                 }
121                 if (count < 0 || count > (chars.Length - index)) {
122                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
123                 }
124
125                 // Determine the length of the output.
126                 int length = 0;
127                 int leftOverSize = (leftOver >> 8);
128                 byte[] rules = encodingRules;
129                 int ch, rule;
130                 while (count > 0) {
131                         ch = (int)(chars[index++]);
132                         --count;
133                         if (ch < 0x0080) {
134                                 rule = rules[ch];
135                         } else {
136                                 rule = 0;
137                         }
138                         switch (rule) {
139                         case 0:
140                                 // Handle characters that must be fully encoded.
141                                 if (leftOverSize == 0) {
142                                         ++length;
143                                 }
144                                 leftOverSize += 16;
145                                 while (leftOverSize >= 6) {
146                                         ++length;
147                                         leftOverSize -= 6;
148                                 }
149                                 break;
150                         case 1:
151                                 // The character is encoded as itself.
152                                 if (leftOverSize != 0) {
153                                         // Flush the previous encoded sequence.
154                                         length += 2;
155                                         leftOverSize = 0;
156                                 }
157                                 ++length;
158                                 break;
159                         case 2:
160                                 // The character may need to be encoded.
161                                 if (allowOptionals) {
162                                         goto case 1;
163                                 } else {
164                                         goto case 0;
165                                 }
166                         // Not reached.
167                         case 3:
168                                 // Encode the plus sign as "+-".
169                                 if (leftOverSize != 0) {
170                                         // Flush the previous encoded sequence.
171                                         length += 2;
172                                         leftOverSize = 0;
173                                 }
174                                 length += 2;
175                                 break;
176                         }
177                 }
178                 if (leftOverSize != 0 && flush) {
179                         length += 2;
180                 }
181
182                 // Return the length to the caller.
183                 return length;
184         }
185
186         // Get the number of bytes needed to encode a character buffer.
187         public override int GetByteCount (char[] chars, int index, int count)
188         {
189                 return InternalGetByteCount (chars, index, count, true, 0, allowOptionals);
190         }
191
192         // Internal version of "GetBytes" that can handle a
193         // rolling state between calls.
194         private static int InternalGetBytes
195                                 (char[] chars, int charIndex, int charCount,
196                                  byte[] bytes, int byteIndex, bool flush,
197                                  ref int leftOver, bool allowOptionals)
198         {
199                 // Validate the parameters.
200                 if (chars == null) {
201                         throw new ArgumentNullException ("chars");
202                 }
203                 if (bytes == null) {
204                         throw new ArgumentNullException ("bytes");
205                 }
206                 if (charIndex < 0 || charIndex > chars.Length) {
207                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
208                 }
209                 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
210                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
211                 }
212                 if (byteIndex < 0 || byteIndex > bytes.Length) {
213                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
214                 }
215
216                 // Convert the characters.
217                 int posn = byteIndex;
218                 int byteLength = bytes.Length;
219                 int leftOverSize = (leftOver >> 8);
220                 int leftOverBits = (leftOver & 0xFF);
221                 byte[] rules = encodingRules;
222                 String base64 = base64Chars;
223                 int ch, rule;
224                 while (charCount > 0) {
225                         ch = (int)(chars[charIndex++]);
226                         --charCount;
227                         if (ch < 0x0080) {
228                                 rule = rules[ch];
229                         } else {
230                                 rule = 0;
231                         }
232                         switch (rule) {
233                         case 0:
234                                 // Handle characters that must be fully encoded.
235                                 if (leftOverSize == 0) {
236                                         if (posn >= byteLength) {
237                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
238                                         }
239                                         bytes[posn++] = (byte)'+';
240                                 }
241                                 leftOverBits = ((leftOverBits << 16) | ch);
242                                 leftOverSize += 16;
243                                 while (leftOverSize >= 6) {
244                                         if (posn >= byteLength) {
245                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
246                                         }
247                                         leftOverSize -= 6;
248                                         bytes[posn++] = (byte)(base64 [leftOverBits >> leftOverSize]);
249                                         leftOverBits &= ((1 << leftOverSize) - 1);
250                                 }
251                                 break;
252                         case 1:
253                                 // The character is encoded as itself.
254                                 if (leftOverSize != 0) {
255                                         // Flush the previous encoded sequence.
256                                         if ((posn + 2) > byteLength) {
257                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
258                                         }
259                                         bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
260                                         bytes[posn++] = (byte)'-';
261                                         leftOverSize = 0;
262                                         leftOverBits = 0;
263                                 }
264                                 if (posn >= byteLength) {
265                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
266                                 }
267                                 bytes[posn++] = (byte)ch;
268                                 break;
269                         case 2:
270                                 // The character may need to be encoded.
271                                 if (allowOptionals) {
272                                         goto case 1;
273                                 } else {
274                                         goto case 0;
275                                 }
276                                 // Not reached.
277                         case 3:
278                                 // Encode the plus sign as "+-".
279                                 if (leftOverSize != 0) {
280                                         // Flush the previous encoded sequence.
281                                         if ((posn + 2) > byteLength) {
282                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
283                                         }
284                                         bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
285                                         bytes[posn++] = (byte)'-';
286                                         leftOverSize = 0;
287                                         leftOverBits = 0;
288                                 }
289                                 if ((posn + 2) > byteLength) {
290                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
291                                 }
292                                 bytes[posn++] = (byte)'+';
293                                 bytes[posn++] = (byte)'-';
294                                 break;
295                         }
296                 }
297                 if (leftOverSize != 0 && flush) {
298                         if ((posn + 2) > byteLength) {
299                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
300                         }
301                         bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
302                         bytes[posn++] = (byte)'-';
303                         leftOverSize = 0;
304                         leftOverBits = 0;
305                 }
306                 leftOver = ((leftOverSize << 8) | leftOverBits);
307
308                 // Return the length to the caller.
309                 return posn - byteIndex;
310         }
311
312         // Get the bytes that result from encoding a character buffer.
313         public override int GetBytes (char[] chars, int charIndex, int charCount,
314                                                                  byte[] bytes, int byteIndex)
315         {
316                 int leftOver = 0;
317                 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, true,
318                                                                 ref leftOver, allowOptionals);
319         }
320
321         // Internal version of "GetCharCount" that can handle
322         // a rolling state between call.s
323         private static int InternalGetCharCount
324                                         (byte[] bytes, int index, int count, int leftOver)
325         {
326                 // Validate the parameters.
327                 if (bytes == null) {
328                         throw new ArgumentNullException ("bytes");
329                 }
330                 if (index < 0 || index > bytes.Length) {
331                         throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
332                 }
333                 if (count < 0 || count > (bytes.Length - index)) {
334                         throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
335                 }
336
337                 // Determine the length of the result.
338                 int length = 0;
339                 int byteval, b64value;
340                 bool normal = ((leftOver & 0x01000000) == 0);
341                 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
342                 int leftOverSize = ((leftOver >> 16) & 0xFF);
343                 sbyte[] base64 = base64Values;
344                 while (count > 0) {
345                         byteval = (int)(bytes[index++]);
346                         --count;
347                         if (normal) {
348                                 if (byteval != '+') {
349                                         // Directly-encoded character.
350                                         ++length;
351                                 } else {
352                                         // Start of a base64-encoded character.
353                                         normal = false;
354                                         prevIsPlus = true;
355                                 }
356                         } else {
357                                 // Process the next byte in a base64 sequence.
358                                 if (byteval == (int)'-') {
359                                         // End of a base64 sequence.
360                                         if (prevIsPlus) {
361                                                 ++length;
362                                                 leftOverSize = 0;
363                                         }
364                                         normal = true;
365                                 } else if ((b64value = base64[byteval]) != -1) {
366                                         // Extra character in a base64 sequence.
367                                         leftOverSize += 6;
368                                         if (leftOverSize >= 16) {
369                                                 ++length;
370                                                 leftOverSize -= 16;
371                                         }
372                                 } else {
373                                         // Normal character terminating a base64 sequence.
374                                         if (leftOverSize > 0) {
375                                                 ++length;
376                                                 leftOverSize = 0;
377                                         }
378                                         ++length;
379                                         normal = true;
380                                 }
381                                 prevIsPlus = false;
382                         }
383                 }
384
385                 // Return the final length to the caller.
386                 return length;
387         }
388
389         // Get the number of characters needed to decode a byte buffer.
390         public override int GetCharCount (byte[] bytes, int index, int count)
391         {
392                 return InternalGetCharCount (bytes, index, count, 0);
393         }
394
395         // Internal version of "GetChars" that can handle a
396         // rolling state between calls.
397         private static int InternalGetChars (byte[] bytes, int byteIndex, int byteCount,
398                                  char[] chars, int charIndex, ref int leftOver)
399         {
400                 // Validate the parameters.
401                 if (bytes == null) {
402                         throw new ArgumentNullException ("bytes");
403                 }
404                 if (chars == null) {
405                         throw new ArgumentNullException ("chars");
406                 }
407                 if (byteIndex < 0 || byteIndex > bytes.Length) {
408                         throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
409                 }
410                 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
411                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
412                 }
413                 if (charIndex < 0 || charIndex > chars.Length) {
414                         throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
415                 }
416
417                 // Convert the bytes into characters.
418                 int posn = charIndex;
419                 int charLength = chars.Length;
420                 int byteval, b64value;
421                 bool normal = ((leftOver & 0x01000000) == 0);
422                 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
423                 int leftOverSize = ((leftOver >> 16) & 0xFF);
424                 int leftOverBits = (leftOver & 0xFFFF);
425                 sbyte[] base64 = base64Values;
426                 while (byteCount > 0) {
427                         byteval = (int)(bytes[byteIndex++]);
428                         --byteCount;
429                         if (normal) {
430                                 if (byteval != '+') {
431                                         // Directly-encoded character.
432                                         if (posn >= charLength) {
433                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
434                                         }
435                                         chars[posn++] = (char)byteval;
436                                 } else {
437                                         // Start of a base64-encoded character.
438                                         normal = false;
439                                         prevIsPlus = true;
440                                 }
441                         } else {
442                                 // Process the next byte in a base64 sequence.
443                                 if (byteval == (int)'-') {
444                                         // End of a base64 sequence.
445                                         if (prevIsPlus) {
446                                                 if (posn >= charLength) {
447                                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
448                                                 }
449                                                 chars[posn++] = '+';
450                                         }
451                                         // RFC1642 Rule #2
452                                         // When decoding, any bits at the end of the Modified Base64 sequence that 
453                                         // do not constitute a complete 16-bit Unicode character are discarded. 
454                                         // If such discarded bits are non-zero the sequence is ill-formed.
455                                         if (leftOverBits != 0)
456                                                 throw new FormatException ("unused bits not zero");
457                                         normal = true;
458                                 } else if ((b64value = base64[byteval]) != -1) {
459                                         // Extra character in a base64 sequence.
460                                         leftOverBits = (leftOverBits << 6) | b64value;
461                                         leftOverSize += 6;
462                                         if (leftOverSize >= 16) {
463                                                 if (posn >= charLength) {
464                                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
465                                                 }
466                                                 leftOverSize -= 16;
467                                                 chars[posn++] = (char)(leftOverBits >> leftOverSize);
468                                                 leftOverBits &= ((1 << leftOverSize) - 1);
469                                         }
470                                 } else {
471                                         // Normal character terminating a base64 sequence.
472                                         if (leftOverSize > 0) {
473                                                 if (posn >= charLength) {
474                                                         throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
475                                                 }
476                                                 chars[posn++] = (char)(leftOverBits << (16 - leftOverSize));
477                                                 leftOverSize = 0;
478                                                 leftOverBits = 0;
479                                         }
480                                         if (posn >= charLength) {
481                                                 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
482                                         }
483                                         chars[posn++] = (char)byteval;
484                                         normal = true;
485                                 }
486                                 prevIsPlus = false;
487                         }
488                 }
489                 leftOver = (leftOverBits | (leftOverSize << 16) |
490                                     (normal ? 0 : 0x01000000) |
491                                     (prevIsPlus ? 0x02000000 : 0));
492
493                 // Return the final length to the caller.
494                 return posn - charIndex;
495         }
496
497         // Get the characters that result from decoding a byte buffer.
498         public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
499                                                                  char[] chars, int charIndex)
500         {
501                 int leftOver = 0;
502                 return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
503         }
504
505         // Get the maximum number of bytes needed to encode a
506         // specified number of characters.
507         public override int GetMaxByteCount (int charCount)
508         {
509                 if (charCount < 0) {
510                         throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
511                 }
512                 if (charCount == 0)
513                         return 0;
514                 return 8 * (int) (charCount / 3) + (charCount % 3) * 3 + 2;
515         }
516
517         // Get the maximum number of characters needed to decode a
518         // specified number of bytes.
519         public override int GetMaxCharCount (int byteCount)
520         {
521                 if (byteCount < 0) {
522                         throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
523                 }
524                 return byteCount;
525         }
526
527         // Get a UTF7-specific decoder that is attached to this instance.
528         public override Decoder GetDecoder ()
529         {
530                 return new UTF7Decoder ();
531         }
532
533         // Get a UTF7-specific encoder that is attached to this instance.
534         public override Encoder GetEncoder ()
535         {
536                 return new UTF7Encoder (allowOptionals);
537         }
538
539         // UTF-7 decoder implementation.
540         private sealed class UTF7Decoder : Decoder
541         {
542                 // Internal state.
543                 private int leftOver;
544
545                 // Constructor.
546                 public UTF7Decoder ()
547                 {
548                         leftOver = 0;
549                 }
550
551                 // Override inherited methods.
552                 public override int GetCharCount (byte[] bytes, int index, int count)
553                 {
554                         return InternalGetCharCount (bytes, index, count, leftOver);
555                 }
556                 public override int GetChars (byte[] bytes, int byteIndex,
557                                                                          int byteCount, char[] chars,
558                                                                          int charIndex)
559                 {
560                         return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
561                 }
562
563         } // class UTF7Decoder
564
565         // UTF-7 encoder implementation.
566         private sealed class UTF7Encoder : Encoder
567         {
568                 private bool allowOptionals;
569                 private int leftOver;
570
571                 // Constructor.
572                 public UTF7Encoder (bool allowOptionals)
573                 {
574                         this.allowOptionals = allowOptionals;
575                         this.leftOver = 0;
576                 }
577
578                 // Override inherited methods.
579                 public override int GetByteCount (char[] chars, int index,
580                                                                                  int count, bool flush)
581                 {
582                         return InternalGetByteCount
583                                 (chars, index, count, flush, leftOver, allowOptionals);
584                 }
585                 public override int GetBytes (char[] chars, int charIndex,
586                                                                          int charCount, byte[] bytes,
587                                                                          int byteIndex, bool flush)
588                 {
589                         return InternalGetBytes (chars, charIndex, charCount,
590                                                                         bytes, byteIndex, flush,
591                                                                         ref leftOver, allowOptionals);
592                 }
593
594         } // class UTF7Encoder
595
596 }; // class UTF7Encoding
597
598 }; // namespace System.Text