mcs/class/referencesource/mscorlib/system/text/iso2022encoding.cs

   1 // ==++==
   2 //
   3 //   Copyright (c) Microsoft Corporation.  All rights reserved.
   4 //
   5 // ==--==
   6
   7 //  ISO2022Encoding.cs
   8 //
   9 //  Ported to managed code from c_is2022.c and related iso 2022 dll files from mlang
  10 //
  11 //  Abstract:
  12 //
  13 //      Managed implimentation of ISO 2022 code pages, ported from the implimentation in c_is2022.dll
  14 //      This code should be kept in [....] with the other implimentations
  15 //      This encoding wraps the basic encodings in code that adds the shift in/out wrapper methods
  16 //
  17 //  Notes:
  18 //
  19 // IsAlwaysNormalized ???
  20 // Regarding Normalization for ISO-2022-JP (50220, 50221, 50222), its the same rules as EUCJP
  21 //  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
  22 //  Form D is precluded because of 0x00a8, which changes to space + dierises.
  23 //
  24 // Note: I think that IsAlwaysNormalized should probably return true for form C for Japanese 20932 based CPs.
  25 //
  26 // For ISO-2022-KR
  27 //  Never normalized, C & D (& therefore KC & KD) are precluded because of Hangul syllables and combined characters.
  28 //
  29 // IsAlwaysNormalized ???
  30 // Regarding Normalization for ISO-2022-CN (50227, 50229) & HZ-GB2312 (52936) I think is similar to the Japanese case.
  31 //  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
  32 //  Form D is precluded because of 0x00a8, which changes to space + dierises.
  33 //
  34 // Note: I think that IsAlwaysNormalized should probably return true for form C for Chinese 20936 based CPs.
  35 //
  36 #if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
  37 namespace System.Text
  38 {
  39     using System.Globalization;
  40     using System.Diagnostics.Contracts;
  41     using System.Text;
  42     using System.Runtime.InteropServices;
  43     using System;
  44     using System.Security;
  45     using System.Runtime.CompilerServices;
  46     using System.Runtime.Serialization;
  47
  48
  49     /*=================================ISO2022Encoding============================
  50     **
  51     ** This is used to support ISO 2022 encodings that use shift/escape sequences.
  52     **
  53     ==============================================================================*/
  54
  55     [Serializable]
  56     internal class ISO2022Encoding : DBCSCodePageEncoding
  57     {
  58         const byte SHIFT_OUT            = (byte)0x0E;
  59         const byte SHIFT_IN             = (byte)0x0F;
  60         const byte ESCAPE               = 0x1B;
  61         const byte LEADBYTE_HALFWIDTH   = 0x10;
  62
  63         // We have to load the 936 code page tables, so impersonate 936 as our base
  64         // This pretends to be other code pages as far as memory sections are concerned.
  65         [System.Security.SecurityCritical]  // auto-generated
  66         internal ISO2022Encoding(int codePage) : base(codePage, tableBaseCodePages[codePage % 10])
  67         {
  68             this.m_bUseMlangTypeForSerialization = true;
  69         }
  70
  71         // Constructor called by serialization.
  72         // Note:  We use the base GetObjectData however
  73         [System.Security.SecurityCritical]  // auto-generated
  74         internal ISO2022Encoding(SerializationInfo info, StreamingContext context) : base(info, context)
  75         {
  76             // Actually this can't ever get called, CodePageEncoding is our proxy
  77             Contract.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
  78             throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
  79         }
  80
  81         static int[] tableBaseCodePages =
  82         {
  83             932,    // 50220  ISO-2022-JP, No halfwidth Katakana, convert to full width
  84             932,    // 50221  ISO-2022-JP, Use escape sequence for half width Katakana
  85             932,    // 50222  ISO-2022-JP, Use shift-in/shift-out for half width Katakana
  86             0,
  87             0,
  88             949,    // 50225  ISO-2022-KR, Korean
  89             936,    // 52936  HZ-GB2312, 936 might be better source
  90             0, //20936,    // 50227  ISO-2022-CN, Note: This is just the same as CP 936 in Everett.
  91             0,
  92             // 50229 is currently unsupported, CP 20000 is currently not built in .nlp file
  93             0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_1
  94             0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_2
  95             0         //                     ModeASCII
  96         };
  97
  98         internal enum ISO2022Modes
  99         {
 100             ModeHalfwidthKatakana   = 0,
 101             ModeJIS0208             = 1,
 102             ModeKR                  = 5,
 103             ModeHZ                  = 6,
 104             ModeGB2312              = 7,
 105             ModeCNS11643_1          = 9,
 106             ModeCNS11643_2          = 10,
 107             ModeASCII               = 11,
 108
 109             ModeIncompleteEscape    = -1,
 110             ModeInvalidEscape       = -2,
 111             ModeNOOP                = -3
 112         }
 113
 114         [System.Security.SecurityCritical]  // auto-generated
 115         protected unsafe override String GetMemorySectionName()
 116         {
 117             int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
 118
 119             String strFormat;
 120
 121             switch (this.CodePage)
 122             {
 123                 case 50220:
 124                 case 50221:
 125                 case 50222:
 126                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022JP";
 127                     break;
 128                 case 50225:
 129                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022KR";
 130                     break;
 131                 case 52936:
 132                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_HZ";
 133                     break;
 134                 default:
 135                     Contract.Assert(false, "[ISO2022Encoding.GetMemorySectionName] Don't expect to get here for code page " + this.CodePage);
 136                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}";
 137                     break;
 138             }
 139
 140             String strName = String.Format(CultureInfo.InvariantCulture, strFormat,
 141                 iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
 142                 this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
 143
 144             return strName;
 145         }
 146
 147         // Clean up characters for ISO2022 code pages, etc.
 148         // ISO2022 (50220, 50221, 50222)
 149         // GB-HZ (52936)
 150         protected override bool CleanUpBytes(ref int bytes)
 151         {
 152             switch (this.CodePage)
 153             {
 154                 // 932 based code pages
 155                 case 50220:
 156                 case 50221:
 157                 case 50222:
 158                 {
 159                     if (bytes >= 0x100)
 160                     {
 161                         // map extended char (0xfa40-0xfc4b) to a special range
 162                         // (ported from mlang)
 163                         if (bytes >= 0xfa40 && bytes <= 0xfc4b)
 164                         {
 165                             if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
 166                             {
 167                                 if ( bytes <= 0xfa49 )
 168                                     bytes = bytes - 0x0b51 ;
 169                                 else if ( bytes >= 0xfa4a && bytes <= 0xfa53 )
 170                                     bytes = bytes - 0x072f6 ;
 171                                 else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
 172                                     bytes = bytes - 0x0b5b ;
 173                                 else if ( bytes == 0xfa58 )
 174                                     bytes = 0x878a ;
 175                                 else if ( bytes == 0xfa59 )
 176                                     bytes = 0x8782 ;
 177                                 else if ( bytes == 0xfa5a )
 178                                     bytes = 0x8784 ;
 179                                 else if ( bytes == 0xfa5b )
 180                                     bytes = 0x879a ;
 181                             }
 182                             else if ( bytes >= 0xfa5c && bytes <= 0xfc4b )
 183                             {
 184                                 byte tc = unchecked((byte)bytes);
 185                                 if ( tc < 0x5c )
 186                                     bytes = bytes - 0x0d5f;
 187                                 else if ( tc >= 0x80 && tc <= 0x9B )
 188                                     bytes = bytes - 0x0d1d;
 189                                 else
 190                                     bytes = bytes - 0x0d1c;
 191                             }
 192                         }
 193
 194                         // Convert 932 code page to 20932 like code page range
 195                         // (also ported from mlang)
 196                         byte bLead = unchecked((byte)(bytes >> 8));
 197                         byte bTrail = unchecked((byte)bytes);
 198
 199                         bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
 200                         bLead = (byte)((bLead << 1) + 1);
 201                         if (bTrail > (byte)0x9e)
 202                         {
 203                             bTrail -= (byte)0x7e;
 204                             bLead++;
 205                         }
 206                         else
 207                         {
 208                             if (bTrail > (byte)0x7e)
 209                                 bTrail--;
 210                             bTrail -= (byte)0x1f;
 211                         }
 212
 213                         bytes = ((int)bLead) << 8 | (int)bTrail;
 214
 215                         // Don't step out of our allocated lead byte area.
 216                         // All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
 217                         // This is commented out because Everett/Mlang had illegal PUA
 218                         // mappings to ISO2022 code pages that we're maintaining.
 219 //                        if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
 220   //                          (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e)
 221     //                        return false;
 222                     }
 223                     else
 224                     {
 225                         // Adjust 1/2 Katakana
 226                         if (bytes >= 0xa1 && bytes <= 0xdf)
 227                             bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80;
 228
 229                         // 0x81-0x9f and 0xe0-0xfc CP 932
 230                         // 0x8e and 0xa1-0xfe      CP 20932 (we don't use 8e though)
 231                         // b0-df is 1/2 Katakana
 232                         if (bytes >= 0x81 &&
 233                             (bytes <= 0x9f ||
 234                              (bytes >= 0xe0 && bytes <= 0xfc)))
 235                         {
 236                             // Don't do lead bytes, we use escape sequences instead.
 237                             return false;
 238                         }
 239                     }
 240                     break;
 241                 }
 242                 case 50225:
 243                 {
 244                     // For 50225 since we don't rely on lead byte marks, return false and don't add them,
 245                     // esp. since we're only a 7 bit code page.
 246                     if (bytes >= 0x80 && bytes <= 0xff)
 247                         return false;
 248
 249                     // Ignore characters out of range (a1-7f)
 250                     if (bytes >= 0x100 &&
 251                         ((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
 252                          (bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
 253                          return false;
 254
 255                     // May as well get them into our 7 bit range
 256                     bytes &= 0x7f7f;
 257
 258                     break;
 259                 }
 260                 case 52936:
 261                 {
 262                     // Since we don't rely on lead byte marks for 52936, get rid of them so we
 263                     // don't end up with extra wierd fffe mappings.
 264                     if (bytes >= 0x81 && bytes <= 0xfe)
 265                         return false;
 266
 267                     break;
 268                 }
 269             }
 270
 271             return true;
 272         }
 273
 274         // GetByteCount
 275         [System.Security.SecurityCritical]  // auto-generated
 276         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
 277         {
 278             // Just need to ASSERT, this is called by something else internal that checked parameters already
 279             Contract.Assert(count >= 0, "[ISO2022Encoding.GetByteCount]count is negative");
 280             Contract.Assert(chars != null, "[ISO2022Encoding.GetByteCount]chars is null");
 281
 282             // Just call GetBytes with null byte* to get count
 283             return GetBytes(chars, count, null, 0, baseEncoder);
 284         }
 285
 286         [System.Security.SecurityCritical]  // auto-generated
 287         internal override unsafe int GetBytes(char* chars, int charCount,
 288                                                 byte* bytes, int byteCount, EncoderNLS baseEncoder)
 289         {
 290             // Just need to ASSERT, this is called by something else internal that checked parameters already
 291             Contract.Assert(chars != null, "[ISO2022Encoding.GetBytes]chars is null");
 292             Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetBytes]byteCount is negative");
 293             Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetBytes]charCount is negative");
 294
 295             // Assert because we shouldn't be able to have a null encoder.
 296             Contract.Assert(encoderFallback != null, "[ISO2022Encoding.GetBytes]Attempting to use null encoder fallback");
 297
 298             // Fix our encoder
 299             ISO2022Encoder encoder = (ISO2022Encoder)baseEncoder;
 300
 301             // Our return value
 302             int iCount = 0;
 303
 304             switch(CodePage)
 305             {
 306                 case 50220:
 307                 case 50221:
 308                 case 50222:
 309                     iCount = GetBytesCP5022xJP( chars, charCount, bytes, byteCount, encoder );
 310                     break;
 311                 case 50225:
 312                     iCount = GetBytesCP50225KR( chars, charCount, bytes, byteCount, encoder );
 313                     break;
 314 // Everett had 50227 the same as 936
 315 /*              case 50227:
 316                     iCount = GetBytesCP50227CN( chars, charCount, bytes, byteCount, encoder );
 317                     break;
 318 */
 319                 case 52936:
 320                     iCount = GetBytesCP52936( chars, charCount, bytes, byteCount, encoder );
 321                     break;
 322             }
 323
 324             return iCount;
 325         }
 326
 327         // This is internal and called by something else,
 328         [System.Security.SecurityCritical]  // auto-generated
 329         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
 330         {
 331             // Just assert, we're called internally so these should be safe, checked already
 332             Contract.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null");
 333             Contract.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative");
 334
 335             // Just call getChars with null char* to get count
 336             return GetChars(bytes, count, null, 0, baseDecoder);
 337         }
 338
 339         [System.Security.SecurityCritical]  // auto-generated
 340         internal override unsafe int GetChars(byte* bytes, int byteCount,
 341                                                 char* chars, int charCount, DecoderNLS baseDecoder)
 342         {
 343             // Just need to ASSERT, this is called by something else internal that checked parameters already
 344             Contract.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null");
 345             Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative");
 346             Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative");
 347
 348             // Fix our decoder
 349             ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder;
 350             int iCount = 0;
 351
 352             switch (CodePage)
 353             {
 354                 case 50220:
 355                 case 50221:
 356                 case 50222:
 357                     iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder);
 358                     break;
 359                 case 50225:
 360                     iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder);
 361                     break;
 362                     // Currently 50227 is the same as 936
 363 //                case 50227:
 364   //                  iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder);
 365     //                break;
 366                 case 52936:
 367                     iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder);
 368                     break;
 369                 default:
 370                     Contract.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page");
 371                     break;
 372             }
 373
 374             return iCount;
 375         }
 376
 377         // ISO 2022 Code pages for JP.
 378         //  50220 - No halfwidth Katakana, convert to full width
 379         //  50221 - Use escape sequence for half width Katakana
 380         //  50222 - Use shift-in/shift-out for half width Katakana
 381         //
 382         // These are the JIS code pages, superset of ISO-2022 / ISO-2022-JP-1
 383         //  0E          Shift Out (following bytes are Katakana)
 384         //  0F          Shift In  (back to "normal" behavior)
 385         //  21-7E       Byte ranges (1 or 2 bytes)
 386         //  <ESC> $ @   To Double Byte 0208 Mode (actually older code page, but subset of 0208)
 387         //  <ESC> $ B   To Double Byte 0208 Mode (duplicate)
 388         //  <ESC> $ ( D To Double Byte 0212 Mode (previously we misinterpreted this)
 389         //  <ESC> $ I   To half width Katakana
 390         //  <ESC> ( J   To JIS-Roman
 391         //  <ESC> ( H   To JIS-Roman (swedish character set)
 392         //  <ESC> ( B   To ASCII
 393         //  <ESC> & @   Alternate lead in to <ESC> $ B so just ignore it.
 394         //
 395         // So in Katakana mode we add 0x8e as a lead byte and use CP 20932 to convert it
 396         // In ASCII mode we just spit out the single byte.
 397         // In Roman mode we should change 0x5c (\) -> Yen sign and 0x7e (~) to Overline, however
 398         //      we didn't in mLang, otherwise roman is like ASCII.
 399         // In 0208 double byte mode we have to |= with 0x8080 and use CP 20932 to convert it.
 400         // In 0212 double byte mode we have to |= with 0x8000 and use CP 20932 to convert it.
 401         //
 402         // Note that JIS Shift In/Shift Out is different than the other ISO2022 encodings.  For JIS
 403         // Shift out always shifts to half-width Katakana.  Chinese encodings use designator sequences
 404         // instead of escape sequences and shift out to the designated sequence or back in to ASCII.
 405         //
 406         // When decoding JIS 0208, MLang used a '*' (0x2a) character in JIS 0208 mode to map the trailing byte
 407         // to halfwidth katakana.  I found no description of that behavior, however that block of 0208 is
 408         // undefined, so we maintain that behavior when decoding.  We will never generate characters using
 409         // that technique, but the decoder will process them.
 410         //
 411         [System.Security.SecurityCritical]  // auto-generated
 412         private unsafe int GetBytesCP5022xJP(char* chars, int charCount,
 413                                                   byte* bytes, int byteCount, ISO2022Encoder encoder)
 414         {
 415             // prepare our helpers
 416             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
 417                 this, encoder, bytes, byteCount, chars, charCount);
 418
 419             // Get our mode
 420             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
 421             ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that shift in will go back to (only used by CP 50222)
 422
 423             // Check our encoder
 424             if (encoder != null)
 425             {
 426                 char charLeftOver = encoder.charLeftOver;
 427
 428                 currentMode = encoder.currentMode;
 429                 shiftInMode = encoder.shiftInOutMode;
 430
 431                 // We may have a left over character from last time, try and process it.
 432                 if (charLeftOver > 0)
 433                 {
 434                     Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP5022xJP]leftover character should be high surrogate");
 435
 436                     // It has to be a high surrogate, which we don't support, so it has to be a fallback
 437                     buffer.Fallback(charLeftOver);
 438                 }
 439             }
 440
 441             while (buffer.MoreData)
 442             {
 443                 // Get our char
 444                 char ch = buffer.GetNextChar();
 445
 446                 // Get our bytes
 447                 ushort iBytes = mapUnicodeToBytes[ch];
 448
 449                 StartConvert:
 450                 // Check for halfwidth bytes
 451                 byte bLeadByte = (byte)(iBytes >> 8);
 452                 byte bTrailByte = (byte)(iBytes & 0xff);
 453
 454                 if (bLeadByte == LEADBYTE_HALFWIDTH)
 455                 {
 456                     // Its Halfwidth Katakana
 457                     if (CodePage == 50220)
 458                     {
 459                         // CodePage 50220 doesn't use halfwidth Katakana, convert to fullwidth
 460                         // See if its out of range, fallback if so, throws if recursive fallback
 461                         if (bTrailByte < 0x21 || bTrailByte >= 0x21 + HalfToFullWidthKanaTable.Length)
 462                         {
 463                             buffer.Fallback(ch);
 464                             continue;
 465                         }
 466
 467                         // Get the full width katakana char to use.
 468                         iBytes = unchecked((ushort)(HalfToFullWidthKanaTable[bTrailByte - 0x21] & 0x7F7F));
 469
 470                         // May have to do all sorts of fun stuff for mode, go back to start convert
 471                         goto StartConvert;
 472                     }
 473
 474                     // Can use halfwidth Katakana, make sure we're in right mode
 475
 476                     // Make sure we're in right mode
 477                     if (currentMode != ISO2022Modes.ModeHalfwidthKatakana)
 478                     {
 479                         // 50222 or 50221, either shift in/out or escape to get to Katakana mode
 480                         if (CodePage == 50222)
 481                         {
 482                             // Shift Out
 483                             if (!buffer.AddByte(SHIFT_OUT))
 484                                 break;  // convert out of space, stop
 485
 486                             // Don't change modes until after AddByte in case it fails for convert
 487                             // We get to shift out to Katakana, make sure we'll go back to the right mode
 488                             // (This ends up always being ASCII)
 489                             shiftInMode = currentMode;
 490                             currentMode = ISO2022Modes.ModeHalfwidthKatakana;
 491                         }
 492                         else
 493                         {
 494                             // 50221 does halfwidth katakana by escape sequence
 495                             Contract.Assert(CodePage == 50221, "[ISO2022Encoding.GetBytesCP5022xJP]Expected Code Page 50221");
 496
 497                             // Add our escape sequence
 498                             if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'I')))
 499                                 break;  // convert out of space, stop
 500
 501                             currentMode = ISO2022Modes.ModeHalfwidthKatakana;
 502                         }
 503                     }
 504
 505                     // We know we're in Katakana mode now, so add it.
 506                     // Go ahead and add the Katakana byte.  Our table tail bytes are 0x80 too big.
 507                     if (!buffer.AddByte(unchecked((byte)(bTrailByte & 0x7F))))
 508                         break;  // convert out of space, stop
 509
 510                     // Done with this one
 511                     continue;
 512                 }
 513                 else if (bLeadByte != 0)
 514                 {
 515                     //
 516                     //  It's a double byte character.
 517                     //
 518
 519                     // If we're CP 50222 we may have to shift in from Katakana mode first
 520                     if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
 521                     {
 522                         // Shift In
 523                         if (!buffer.AddByte(SHIFT_IN))
 524                             break;    // convert out of space, stop
 525
 526                         // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
 527                         currentMode = shiftInMode;
 528                     }
 529
 530                     // Make sure we're in the right mode (JIS 0208 or JIS 0212)
 531                     // Note: Right now we don't use JIS 0212.  Also this table'd be wrong
 532
 533                     // Its JIS extension 0208
 534                     if (currentMode != ISO2022Modes.ModeJIS0208)
 535                     {
 536                         // Escape sequence, we can fail after this, mode will be correct for convert
 537                         if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)'B')))
 538                             break;  // Convert out of space, stop
 539
 540                         currentMode = ISO2022Modes.ModeJIS0208;
 541                     }
 542
 543                     // Add our double bytes
 544                     if (!buffer.AddByte(unchecked((byte)(bLeadByte)), unchecked((byte)(bTrailByte))))
 545                         break; // Convert out of space, stop
 546                     continue;
 547                 }
 548                 else if (iBytes != 0 || ch == 0)
 549                 {
 550                     // Single byte Char
 551                     // If we're CP 50222 we may have to shift in from Katakana mode first
 552                     if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
 553                     {
 554                         // Shift IN
 555                         if (!buffer.AddByte(SHIFT_IN))
 556                             break; // convert ran out of room
 557
 558                         // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
 559                         currentMode = shiftInMode;
 560                     }
 561
 562                     // Its a single byte character, switch to ASCII if we have to
 563                     if (currentMode != ISO2022Modes.ModeASCII)
 564                     {
 565                         if (!buffer.AddByte(ESCAPE,unchecked((byte)'('), unchecked((byte)'B')))
 566                             break; // convert ran out of room
 567
 568                         currentMode = ISO2022Modes.ModeASCII;
 569                     }
 570
 571                     // Add the ASCII char
 572                     if (!buffer.AddByte(bTrailByte))
 573                         break; // convert had no room left
 574                     continue;
 575                 }
 576
 577                 // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
 578                 buffer.Fallback(ch);
 579             }
 580
 581             // Switch back to ASCII if MustFlush or no encoder
 582             if (currentMode != ISO2022Modes.ModeASCII &&
 583                 (encoder == null || encoder.MustFlush))
 584             {
 585                 // If we're CP 50222 we may have to shift in from Katakana mode first
 586                 if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
 587                 {
 588                     // Shift IN, only shift mode if necessary.
 589                     if (buffer.AddByte(SHIFT_IN))
 590                         // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
 591                         currentMode = shiftInMode;
 592                     else
 593                         // If not successful, convert will maintain state for next time, also
 594                         // AddByte will have decremented our char count, however we need it to remain the same
 595                         buffer.GetNextChar();
 596                 }
 597
 598                 // switch back to ASCII to finish neatly
 599                 if (currentMode != ISO2022Modes.ModeASCII &&
 600                     (CodePage != 50222 || currentMode != ISO2022Modes.ModeHalfwidthKatakana))
 601                 {
 602                     // only shift if it was successful
 603                     if (buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B')))
 604                         currentMode = ISO2022Modes.ModeASCII;
 605                     else
 606                         // If not successful, convert will maintain state for next time, also
 607                         // AddByte will have decremented our char count, however we need it to remain the same
 608                         buffer.GetNextChar();
 609                 }
 610             }
 611
 612             // Remember our encoder state
 613             if (bytes != null && encoder != null)
 614             {
 615                 // This is ASCII if we had to flush
 616                 encoder.currentMode = currentMode;
 617                 encoder.shiftInOutMode = shiftInMode;
 618
 619                 if (!buffer.fallbackBuffer.bUsedEncoder)
 620                 {
 621                     encoder.charLeftOver = (char)0;
 622                 }
 623
 624                 encoder.m_charsUsed = buffer.CharsUsed;
 625             }
 626
 627             // Return our length
 628             return buffer.Count;
 629         }
 630
 631         // ISO 2022 Code pages for Korean - CP 50225
 632         //
 633         // CP 50225 has Shift In/Shift Out codes, and a single designator sequence that is supposed
 634         // to appear once in the file, at the beginning of a line, before any multibyte code points.
 635         // So we stick the designator at the beginning of the output.
 636         //
 637         // These are the KR code page codes for ISO-2022-KR
 638         //  0E          Shift Out (following bytes are double byte)
 639         //  0F          Shift In  (back to ASCII behavior)
 640         //  21-7E       Byte ranges (1 or 2 bytes)
 641         //  <ESC> $)C   Double byte ISO-2022-KR designator
 642         //
 643         // Note that this encoding is a little different than other encodings.  The <esc>$)C sequence
 644         // should only appear once per file.  (Actually I saw another spec/rfc that said at the beginning
 645         // of each line, but it shouldn't really matter.)
 646         //
 647         // During decoding Mlang accepted ' ', '\t, and '\n' as their respective characters, even if
 648         // it was in double byte mode.  We maintain that behavior, although I couldn't find a reference or
 649         // reason for that behavior.  We never generate data using that shortcut.
 650         //
 651         // Also Mlang always assumed KR mode, even if the designator wasn't found yet, so we do that as
 652         // well.  So basically we just ignore <ESC>$)C when decoding.
 653         //
 654         [System.Security.SecurityCritical]  // auto-generated
 655         private unsafe int GetBytesCP50225KR(char* chars, int charCount,
 656                                                     byte* bytes, int byteCount, ISO2022Encoder encoder)
 657         {
 658             // prepare our helpers
 659             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
 660                 this, encoder, bytes, byteCount, chars, charCount);
 661
 662             // Get our mode
 663             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
 664             ISO2022Modes shiftOutMode = ISO2022Modes.ModeASCII;     // ModeKR if already stamped lead bytes
 665
 666             // Check our encoder
 667             if (encoder != null)
 668             {
 669                 // May have leftover stuff
 670                 char charLeftOver = encoder.charLeftOver;
 671                 currentMode = encoder.currentMode;
 672                 shiftOutMode = encoder.shiftInOutMode;
 673
 674                 // We may have a l left over character from last time, try and process it.
 675                 if (charLeftOver > 0)
 676                 {
 677                     Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP50225KR]leftover character should be high surrogate");
 678
 679                     // It has to be a high surrogate, which we don't support, so it has to be a fallback
 680                     buffer.Fallback(charLeftOver);
 681                 }
 682             }
 683
 684             while (buffer.MoreData)
 685             {
 686                 // Get our data
 687                 char ch = buffer.GetNextChar();
 688
 689                 // Get our bytes
 690                 ushort iBytes = mapUnicodeToBytes[ch];
 691
 692                 // Check for double byte bytes
 693                 byte bLeadByte = (byte)(iBytes >> 8);
 694                 byte bTrailByte = (byte)(iBytes & 0xff);
 695
 696                 if (bLeadByte != 0)
 697                 {
 698                     //
 699                     //  It's a double byte character.
 700                     //
 701
 702                     // If we haven't done our Korean designator, then do so, if we have any input
 703                     if (shiftOutMode != ISO2022Modes.ModeKR)
 704                     {
 705                         // Add our code page designator sequence
 706                         if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)')'), unchecked((byte)'C')))
 707                             break; // No room during convert.
 708
 709                         shiftOutMode = ISO2022Modes.ModeKR;
 710                     }
 711
 712                     // May have to switch to ModeKR first
 713                     if (currentMode != ISO2022Modes.ModeKR)
 714                     {
 715                         if (!buffer.AddByte(SHIFT_OUT))
 716                             break; // No convert room
 717
 718                         currentMode = ISO2022Modes.ModeKR;
 719                     }
 720
 721                     // Add the bytes
 722                     if (!buffer.AddByte(bLeadByte, bTrailByte))
 723                         break; // no convert room
 724                     continue;
 725                 }
 726                 else if (iBytes != 0 || ch == 0)
 727                 {
 728                     // Its a single byte character, switch to ASCII if we have to
 729                     if (currentMode != ISO2022Modes.ModeASCII)
 730                     {
 731                         if (!buffer.AddByte(SHIFT_IN))
 732                             break;
 733
 734                         currentMode = ISO2022Modes.ModeASCII;
 735                     }
 736
 737                     // Add the ASCII char
 738                     if (!buffer.AddByte(bTrailByte))
 739                         break;
 740                     continue;
 741                 }
 742
 743                 // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
 744                 buffer.Fallback(ch);
 745             }
 746
 747             // Switch back to ASCII if MustFlush or no encoder
 748             if (currentMode != ISO2022Modes.ModeASCII &&
 749                 (encoder == null || encoder.MustFlush))
 750             {
 751                 // Get back to ASCII to be safe.  Only do it if it success.
 752                 if (buffer.AddByte(SHIFT_IN))
 753                     currentMode = ISO2022Modes.ModeASCII;
 754                 else
 755                     // If not successful, convert will maintain state for next time, also
 756                     // AddByte will have decremented our char count, however we need it to remain the same
 757                     buffer.GetNextChar();
 758             }
 759
 760             // Remember our encoder state
 761             if (bytes != null && encoder != null)
 762             {
 763                 // If we didn't use the encoder, then there's no chars left over
 764                 if (!buffer.fallbackBuffer.bUsedEncoder)
 765                 {
 766                     encoder.charLeftOver = (char)0;
 767                 }
 768
 769                 // This is ASCII if we had to flush
 770                 encoder.currentMode = currentMode;
 771
 772                 // We don't use shift out mode, but if we've flushed we need to reset it so it doesn't
 773                 // get output again.
 774                 if (!encoder.MustFlush || encoder.charLeftOver != (char)0)
 775                 {
 776                     // We should be not flushing or converting
 777                     Contract.Assert(!encoder.MustFlush || !encoder.m_throwOnOverflow,
 778                         "[ISO2022Encoding.GetBytesCP50225KR]Expected no left over data or not flushing or not converting");
 779                     encoder.shiftInOutMode = shiftOutMode;
 780                 }
 781                 else
 782                     encoder.shiftInOutMode = ISO2022Modes.ModeASCII;
 783
 784                 encoder.m_charsUsed = buffer.CharsUsed;
 785             }
 786
 787             // Return our length
 788             return buffer.Count;
 789         }
 790
 791         // CP52936 is HZ Encoding
 792         // HZ Encoding has 4 shift sequences:
 793         // ~~       '~' (\u7e)
 794         // ~}       shift into 1 byte mode,
 795         // ~{       shift into 2 byte GB 2312-80
 796         // ~<NL>    Maintain 2 byte mode across new lines (ignore both ~ and <NL> characters)
 797         //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
 798         //
 799         // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
 800         // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
 801         // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
 802         //
 803         // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set.
 804         // (all bytes <= 0x7f)
 805         [System.Security.SecurityCritical]  // auto-generated
 806         private unsafe int GetBytesCP52936(char* chars, int charCount,
 807                                            byte* bytes, int byteCount, ISO2022Encoder encoder)
 808         {
 809             // prepare our helpers
 810             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
 811                 this, encoder, bytes, byteCount, chars, charCount);
 812
 813             // Mode
 814             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
 815
 816             // Check our encoder
 817             if (encoder != null)
 818             {
 819                 char charLeftOver = encoder.charLeftOver;
 820                 currentMode = encoder.currentMode;
 821
 822                 // We may have a left over character from last time, try and process it.
 823                 if (charLeftOver > 0)
 824                 {
 825                     Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP52936]leftover character should be high surrogate");
 826
 827                     // It has to be a high surrogate, which we don't support, so it has to be a fallback
 828                     buffer.Fallback(charLeftOver);
 829                 }
 830             }
 831
 832             while (buffer.MoreData)
 833             {
 834                 // Get our char
 835                 char ch = buffer.GetNextChar();
 836
 837                 // Get our bytes
 838                 ushort sChar = mapUnicodeToBytes[ch];
 839                 if (sChar == 0 && ch != 0)
 840                 {
 841                     // Wasn't a legal byte sequence, its a surrogate or fallback
 842                     // Throws if recursive (knows because we called InternalGetNextChar)
 843                     buffer.Fallback(ch);
 844
 845                     // Done with our char, now process fallback
 846                     continue;
 847                 }
 848
 849                 // Check for halfwidth bytes
 850                 byte bLeadByte = (byte)(sChar >> 8);
 851                 byte bTrailByte = (byte)(sChar & 0xff);
 852
 853                 // If its a double byte, it has to fit in the lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe range
 854                 // (including the 0x8080 that our codepage or's to the value)
 855                 if ((bLeadByte != 0 &&
 856                      (bLeadByte < 0xa1 || bLeadByte > 0xf7 || bTrailByte < 0xa1 || bTrailByte > 0xfe)) ||
 857                     (bLeadByte == 0 && bTrailByte > 0x80 && bTrailByte != 0xff))
 858                 {
 859                     // Illegal character, in 936 code page, but not in HZ subset, get fallback for it
 860                     buffer.Fallback(ch);
 861                     continue;
 862                 }
 863
 864                 // sChar is now either ASCII or has an 0x8080 mask
 865                 if (bLeadByte != 0)
 866                 {
 867                     // Its a double byte mode
 868                     if (currentMode != ISO2022Modes.ModeHZ)
 869                     {
 870                         // Need to add the double byte mode marker
 871                         if (!buffer.AddByte((byte)'~', (byte)'{', 2))
 872                             break;                                      // Stop if no buffer space in convert
 873
 874                         currentMode = ISO2022Modes.ModeHZ;
 875                     }
 876
 877                     // Go ahead and add the 2 bytes
 878                     if (!buffer.AddByte(unchecked((byte)(bLeadByte & 0x7f)), unchecked((byte)(bTrailByte & 0x7f))))
 879                         break;                                      // Stop if no buffer space in convert
 880                 }
 881                 else
 882                 {
 883                     // Its supposed to be ASCII
 884                     if (currentMode != ISO2022Modes.ModeASCII)
 885                     {
 886                         // Need to add the ASCII mode marker
 887                         // Will have 1 more byte (or 2 if ~)
 888                         if (!buffer.AddByte((byte)'~', (byte)'}', bTrailByte == '~' ? 2:1))
 889                             break;
 890
 891                         currentMode = ISO2022Modes.ModeASCII;
 892                     }
 893
 894                     // If its a '~' we'll need an extra one
 895                     if (bTrailByte == '~')
 896                     {
 897                         // Need to add the extra ~
 898                         if (!buffer.AddByte((byte)'~', 1))
 899                             break;
 900                     }
 901
 902                     // Need to add the character
 903                     if (!buffer.AddByte(bTrailByte))
 904                         break;
 905                 }
 906             }
 907
 908             // Add ASCII shift out if we're at end of decoder
 909             if (currentMode != ISO2022Modes.ModeASCII &&
 910                 (encoder == null || encoder.MustFlush))
 911             {
 912                 // Need to add the ASCII mode marker
 913                 // Only turn off other mode if this works
 914                 if (buffer.AddByte((byte)'~',(byte)'}'))
 915                     currentMode = ISO2022Modes.ModeASCII;
 916                 else
 917                     // If not successful, convert will maintain state for next time, also
 918                     // AddByte will have decremented our char count, however we need it to remain the same
 919                     buffer.GetNextChar();
 920             }
 921
 922             // Need to remember our mode
 923             if (encoder != null && bytes != null)
 924             {
 925                 // This is ASCII if we had to flush
 926                 encoder.currentMode = currentMode;
 927
 928                 if (!buffer.fallbackBuffer.bUsedEncoder)
 929                 {
 930                     encoder.charLeftOver = (char)0;
 931                 }
 932
 933                 encoder.m_charsUsed = buffer.CharsUsed;
 934             }
 935
 936             // Return our length
 937             return buffer.Count;
 938         }
 939
 940         [System.Security.SecurityCritical]  // auto-generated
 941         private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount,
 942                                                   char* chars, int charCount, ISO2022Decoder decoder)
 943         {
 944             // Get our info.
 945             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
 946                 this, decoder, chars, charCount, bytes, byteCount);
 947
 948             // No mode information yet
 949             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode
 950             ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that we'll shift in to
 951             byte[] escapeBytes = new byte[4];
 952             int escapeCount = 0;
 953
 954             if (decoder != null)
 955             {
 956                 currentMode = decoder.currentMode;
 957                 shiftInMode = decoder.shiftInOutMode;
 958
 959                 // See if we have leftover decoder buffer to use
 960                 // Load our bytesLeftOver
 961                 escapeCount = decoder.bytesLeftOverCount;
 962
 963                 // Don't want to mess up decoder if we're counting or throw an exception
 964                 for (int i = 0; i < escapeCount; i++)
 965                     escapeBytes[i] = decoder.bytesLeftOver[i];
 966             }
 967
 968             // Do this until the end
 969             while (buffer.MoreData || escapeCount > 0)
 970             {
 971                 byte ch;
 972
 973                 if (escapeCount > 0)
 974                 {
 975                     // Get more escape sequences if necessary
 976                     if (escapeBytes[0] == ESCAPE)
 977                     {
 978                         // Stop if no more input
 979                         if (!buffer.MoreData)
 980                         {
 981                             if (decoder != null && !decoder.MustFlush)
 982                                 break;
 983                         }
 984                         else
 985                         {
 986                             // Add it to the sequence we can check
 987                             escapeBytes[escapeCount++] = buffer.GetNextByte();
 988
 989                             // We have an escape sequence
 990                             ISO2022Modes modeReturn =
 991                                 CheckEscapeSequenceJP(escapeBytes, escapeCount);
 992
 993                             if (modeReturn != ISO2022Modes.ModeInvalidEscape)
 994                             {
 995                                 if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
 996                                 {
 997                                     // Processed escape correctly
 998                                     escapeCount = 0;
 999
1000                                     // We're now this mode
1001                                     currentMode = shiftInMode = modeReturn;
1002                                 }
1003
1004                                 // Either way, continue to get next escape or real byte
1005                                 continue;
1006                             }
1007                         }
1008
1009                         // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
1010                     }
1011
1012                     // Read next escape byte and move them down one.
1013                     ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1014                 }
1015                 else
1016                 {
1017                     // Get our next byte
1018                     ch = buffer.GetNextByte();
1019
1020                     if (ch == ESCAPE)
1021                     {
1022                         // We'll have an escape sequence, use it if we don't have one buffered already
1023                         if (escapeCount == 0)
1024                         {
1025                             // Start this new escape sequence
1026                             escapeBytes[0] = ch;
1027                             escapeCount = 1;
1028                             continue;
1029                         }
1030
1031                         // Flush the previous escape sequence, then reuse this escape byte
1032                         buffer.AdjustBytes(-1);
1033                     }
1034                 }
1035
1036                 if (ch == SHIFT_OUT)
1037                 {
1038                    shiftInMode = currentMode;
1039                    currentMode = ISO2022Modes.ModeHalfwidthKatakana;
1040                    continue;
1041                 }
1042                 else if (ch == SHIFT_IN)
1043                 {
1044                    currentMode = shiftInMode;
1045                    continue;
1046                 }
1047
1048                 // Get our full character
1049                 ushort iBytes = ch;
1050                 bool b2Bytes = false;
1051
1052                 if (currentMode == ISO2022Modes.ModeJIS0208)
1053                 {
1054                     //
1055                     //  To handle errors, we need to check:
1056                     //    1. if trailbyte is there
1057                     //    2. if code is valid
1058                     //
1059                     if (escapeCount > 0)
1060                     {
1061                         // Let another escape fall through
1062                         if (escapeBytes[0] != ESCAPE)
1063                         {
1064                             // Move them down one & get the next data
1065                             iBytes <<= 8;
1066                             iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1067                             b2Bytes = true;
1068                         }
1069                     }
1070                     else if (buffer.MoreData)
1071                     {
1072                         iBytes <<= 8;
1073                         iBytes |= buffer.GetNextByte();
1074                         b2Bytes = true;
1075                     }
1076                     else
1077                     {
1078                         // Not enough input, use decoder if possible
1079                         if (decoder == null || decoder.MustFlush)
1080                         {
1081                             // No decoder, do fallback for this byte
1082                             buffer.Fallback(ch);
1083                             break;
1084                         }
1085
1086                         // Stick it in the decoder if we're not counting
1087                         if (chars != null)
1088                         {
1089                             escapeBytes[0] = ch;
1090                             escapeCount = 1;
1091                         }
1092                         break;
1093                     }
1094
1095                     // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana
1096                     // escape, so use 0x8e00 as katakana lead byte and keep same trail byte.
1097                     // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have
1098                     // any wierd compatibility issues.
1099                     if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00))
1100                     {
1101                         iBytes = (ushort)(iBytes & 0xff);
1102                         iBytes |= (LEADBYTE_HALFWIDTH << 8);   // Put us in the halfwidth katakana range
1103                     }
1104                 }
1105                 else if (iBytes >= 0xA1 && iBytes <= 0xDF)
1106                 {
1107                     // Everett accidentally mapped Katakana like shift-jis (932),
1108                     // even though this is a 7 bit code page.  We keep that mapping
1109                     iBytes |= (LEADBYTE_HALFWIDTH << 8);    // Map to halfwidth katakana range
1110                     iBytes &= 0xff7f;                       // remove extra 0x80
1111                 }
1112                 else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana )
1113                 {
1114                     // Add 0x10 lead byte that our encoding expects for Katakana:
1115                     iBytes |= (LEADBYTE_HALFWIDTH << 8);
1116                 }
1117
1118                 // We have an iBytes to try to convert.
1119                 char c = mapBytesToUnicode[iBytes];
1120
1121                 // See if it was unknown
1122                 if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
1123                 {
1124                     // Have to do fallback
1125                     if (b2Bytes)
1126                     {
1127                         if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
1128                             break;
1129                     }
1130                     else
1131                     {
1132                         if (!buffer.Fallback(ch))
1133                             break;
1134                     }
1135                 }
1136                 else
1137                 {
1138                     // If we were JIS 0208, then we consumed an extra byte
1139                     if (!buffer.AddChar(c, b2Bytes ? 2:1))
1140                         break;
1141                 }
1142             }
1143
1144             // Make sure our decoder state matches our mode, if not counting
1145             if (chars != null && decoder != null)
1146             {
1147                 // Remember it if we don't flush
1148                 if (!decoder.MustFlush || escapeCount != 0)
1149                 {
1150                     // Either not flushing or had state (from convert)
1151                     Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
1152                         "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing");
1153
1154                     decoder.currentMode = currentMode;
1155                     decoder.shiftInOutMode = shiftInMode;
1156
1157                     // Remember escape buffer
1158                     decoder.bytesLeftOverCount = escapeCount;
1159                     decoder.bytesLeftOver = escapeBytes;
1160                 }
1161                 else
1162                 {
1163                     // We flush, clear buffer
1164                     decoder.currentMode = ISO2022Modes.ModeASCII;
1165                     decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
1166                     decoder.bytesLeftOverCount = 0;
1167                     // Slightly different if counting/not counting
1168                 }
1169
1170                 decoder.m_bytesUsed = buffer.BytesUsed;
1171             }
1172
1173             // Return # of characters we found
1174             return buffer.Count;
1175         }
1176
1177         // We know we have an escape sequence, so check it starting with the byte after the escape
1178         private ISO2022Modes CheckEscapeSequenceJP( byte[] bytes, int escapeCount )
1179         {
1180             // Have an escape sequence
1181             if (bytes[0] != ESCAPE)
1182                 return ISO2022Modes.ModeInvalidEscape;
1183
1184             if (escapeCount < 3)
1185                 return ISO2022Modes.ModeIncompleteEscape;
1186
1187             if (bytes[1] == '(')
1188             {
1189                 if (bytes[2] == 'B')       // <esc>(B
1190                 {
1191                     return ISO2022Modes.ModeASCII;
1192                 }
1193                 else if (bytes[2] == 'H')  // <esc>(H
1194                 {
1195                     // Actually this is supposed to be Swedish
1196                     // We treat it like ASCII though.
1197                     return ISO2022Modes.ModeASCII;
1198                 }
1199                 else if (bytes[2] == 'J')  // <esc>(J
1200                 {
1201                     // Actually this is supposed to be Roman
1202                     // 2 characters are different, but historically we treat it as ascii
1203                     return ISO2022Modes.ModeASCII;
1204                 }
1205                 else if (bytes[2] == 'I')  // <esc>(I
1206                 {
1207                     return ISO2022Modes.ModeHalfwidthKatakana;
1208                 }
1209             }
1210             else if (bytes[1] == '$')
1211             {
1212                 if (bytes[2] == '@' ||   // <esc>$@
1213                     bytes[2] == 'B')     // <esc>$B
1214                 {
1215                     return ISO2022Modes.ModeJIS0208;
1216                 }
1217                 else
1218                 {
1219                     // Looking for <esc>$(D
1220                     if (escapeCount < 4)
1221                         return ISO2022Modes.ModeIncompleteEscape;
1222
1223                     if (bytes[2] == '(' && bytes[3] == 'D') // <esc>$(D
1224                     {
1225                         // Mlang treated 0208 like 0212 even though that's wrong
1226                         return ISO2022Modes.ModeJIS0208;
1227                     }
1228                 }
1229             }
1230             else if (bytes[1] == '&')
1231             {
1232                 if (bytes[2] == '@')            // <esc>&@
1233                 {
1234                     // Ignore ESC & @ (prefix to <esc>$B)
1235                     return ISO2022Modes.ModeNOOP;
1236                 }
1237             }
1238
1239             // If we get here we fell through and have an invalid/unknown escape sequence
1240             return ISO2022Modes.ModeInvalidEscape;
1241         }
1242
1243         private byte DecrementEscapeBytes(ref byte[] bytes, ref int count)
1244         {
1245             Contract.Assert(count > 0, "[ISO2022Encoding.DecrementEscapeBytes]count > 0");
1246
1247             // Decrement our count
1248             count--;
1249
1250             // Remember the first one
1251             byte returnValue = bytes[0];
1252
1253             // Move them down one.
1254             for (int i = 0; i < count; i++)
1255             {
1256                 bytes[i] = bytes[i+1];
1257             }
1258
1259             // Clear out the last byte
1260             bytes[count] = 0;
1261
1262             // Return the old 1st byte
1263             return returnValue;
1264         }
1265
1266         // Note that in DBCS mode mlang passed through ' ', '\t' and '\n' as SBCS characters
1267         // probably to allow mailer formatting without too much extra work.
1268         [System.Security.SecurityCritical]  // auto-generated
1269         private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount,
1270                                                    char* chars, int charCount, ISO2022Decoder decoder)
1271         {
1272             // Get our info.
1273             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
1274                 this, decoder, chars, charCount, bytes, byteCount);
1275
1276             // No mode information yet
1277             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode
1278
1279             byte[] escapeBytes = new byte[4];
1280             int escapeCount = 0;
1281
1282             if (decoder != null)
1283             {
1284                 currentMode = decoder.currentMode;
1285
1286                 // See if we have leftover decoder buffer to use
1287                 // Load our bytesLeftOver
1288                 escapeCount = decoder.bytesLeftOverCount;
1289
1290                 // Don't want to mess up decoder if we're counting or throw an exception
1291                 for (int i = 0; i < escapeCount; i++)
1292                     escapeBytes[i] = decoder.bytesLeftOver[i];
1293             }
1294
1295             // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
1296             while (buffer.MoreData || escapeCount > 0)
1297             {
1298                 byte ch;
1299
1300                 if (escapeCount > 0)
1301                 {
1302                     // Get more escape sequences if necessary
1303                     if (escapeBytes[0] == ESCAPE)
1304                     {
1305                         // Stop if no more input
1306                         if (!buffer.MoreData)
1307                         {
1308                             if (decoder != null && !decoder.MustFlush)
1309                                 break;
1310                         }
1311                         else
1312                         {
1313                             // Add it to the sequence we can check
1314                             escapeBytes[escapeCount++] = buffer.GetNextByte();
1315
1316                             // We have an escape sequence
1317                             ISO2022Modes modeReturn =
1318                                 CheckEscapeSequenceKR(escapeBytes, escapeCount);
1319
1320                             if (modeReturn != ISO2022Modes.ModeInvalidEscape)
1321                             {
1322                                 if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
1323                                 {
1324                                     // Processed escape correctly, no effect (we know about KR mode)
1325                                     escapeCount = 0;
1326                                 }
1327
1328                                 // Either way, continue to get next escape or real byte
1329                                 continue;
1330                             }
1331                         }
1332
1333                         // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
1334                     }
1335
1336                     // Still have something left over in escape buffer
1337                     // Get it and move them down one
1338                     ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1339                 }
1340                 else
1341                 {
1342                     // Get our next byte
1343                     ch = buffer.GetNextByte();
1344
1345                     if (ch == ESCAPE)
1346                     {
1347                         // We'll have an escape sequence, use it if we don't have one buffered already
1348                         if (escapeCount == 0)
1349                         {
1350                             // Start this new escape sequence
1351                             escapeBytes[0] = ch;
1352                             escapeCount = 1;
1353                             continue;
1354                         }
1355
1356                         // Flush previous escape sequence, then reuse this escape byte
1357                         buffer.AdjustBytes(-1);
1358                     }
1359                 }
1360
1361                 if (ch == SHIFT_OUT)
1362                 {
1363                    currentMode = ISO2022Modes.ModeKR;
1364                    continue;
1365                 }
1366                 else if (ch == SHIFT_IN)
1367                 {
1368                    currentMode = ISO2022Modes.ModeASCII;
1369                    continue;
1370                 }
1371
1372                 // Get our full character
1373                 ushort iBytes = ch;
1374                 bool b2Bytes = false;
1375
1376                 // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC.
1377                 if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n')
1378                 {
1379                     //
1380                     //  To handle errors, we need to check:
1381                     //    1. if trailbyte is there
1382                     //    2. if code is valid
1383                     //
1384                     if (escapeCount > 0)
1385                     {
1386                         // Let another escape fall through
1387                         if (escapeBytes[0] != ESCAPE)
1388                         {
1389                             // Move them down one & get the next data
1390                             iBytes <<= 8;
1391                             iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1392                             b2Bytes = true;
1393                         }
1394                     }
1395                     else if (buffer.MoreData)
1396                     {
1397                         iBytes <<= 8;
1398                         iBytes |= buffer.GetNextByte();
1399                         b2Bytes = true;
1400                     }
1401                     else
1402                     {
1403                         // Not enough input, use decoder if possible
1404                         if (decoder == null || decoder.MustFlush)
1405                         {
1406                             // No decoder, do fallback for lonely 1st byte
1407                             buffer.Fallback(ch);
1408                             break;
1409                         }
1410
1411                         // Stick it in the decoder if we're not counting
1412                         if (chars != null)
1413                         {
1414                             escapeBytes[0] = ch;
1415                             escapeCount = 1;
1416                         }
1417                         break;
1418                     }
1419                 }
1420
1421                 // We have a iBytes to try to convert.
1422                 char c = mapBytesToUnicode[iBytes];
1423
1424                 // See if it was unknown
1425                 if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
1426                 {
1427                     // Have to do fallback
1428                     if (b2Bytes)
1429                     {
1430                         if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
1431                             break;
1432                     }
1433                     else
1434                     {
1435                         if (!buffer.Fallback(ch))
1436                             break;
1437                     }
1438                 }
1439                 else
1440                 {
1441                     if (!buffer.AddChar(c, b2Bytes ? 2:1))
1442                         break;
1443                 }
1444             }
1445
1446             // Make sure our decoder state matches our mode, if not counting
1447             if (chars != null && decoder != null)
1448             {
1449                 // Remember it if we don't flush
1450                 if (!decoder.MustFlush || escapeCount != 0)
1451                 {
1452                     // Either not flushing or had state (from convert)
1453                     Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
1454                         "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing");
1455
1456                     decoder.currentMode = currentMode;
1457
1458                     // Remember escape buffer
1459                     decoder.bytesLeftOverCount = escapeCount;
1460                     decoder.bytesLeftOver = escapeBytes;
1461                 }
1462                 else
1463                 {
1464                     // We flush, clear buffer
1465                     decoder.currentMode = ISO2022Modes.ModeASCII;
1466                     decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
1467                     decoder.bytesLeftOverCount = 0;
1468                 }
1469
1470                 decoder.m_bytesUsed = buffer.BytesUsed;
1471             }
1472
1473             // Return # of characters we found
1474             return buffer.Count;
1475         }
1476
1477         // We know we have an escape sequence, so check it starting with the byte after the escape
1478         private ISO2022Modes CheckEscapeSequenceKR( byte[] bytes, int escapeCount )
1479         {
1480             // Have an escape sequence
1481             if (bytes[0] != ESCAPE)
1482                 return ISO2022Modes.ModeInvalidEscape;
1483
1484             if (escapeCount < 4)
1485                 return ISO2022Modes.ModeIncompleteEscape;
1486
1487             if (bytes[1] == '$' && bytes[2] == ')' && bytes[3] == 'C') // <esc>$)C
1488                 return ISO2022Modes.ModeKR;
1489
1490             // If we get here we fell through and have an invalid/unknown escape sequence
1491             return ISO2022Modes.ModeInvalidEscape;
1492         }
1493
1494         // CP52936 is HZ Encoding
1495         // HZ Encoding has 4 shift sequences:
1496         // ~~       '~' (\u7e)
1497         // ~}       shift into 1 byte mode,
1498         // ~{       shift into 2 byte GB 2312-80
1499         // ~<NL>    Maintain 2 byte mode across new lines (ignore both ~ and <NL> characters)
1500         //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
1501         //
1502         // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
1503         // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
1504         // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
1505         //
1506         // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set.
1507         // (all bytes <= 0x7f)
1508         [System.Security.SecurityCritical]  // auto-generated
1509         private unsafe int GetCharsCP52936(byte* bytes, int byteCount,
1510                                                 char* chars, int charCount, ISO2022Decoder decoder)
1511         {
1512             Contract.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
1513             Contract.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");
1514
1515             // Get our info.
1516             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
1517                 this, decoder, chars, charCount, bytes, byteCount);
1518
1519             // No mode information yet
1520             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
1521             int byteLeftOver = -1;
1522             bool bUsedDecoder = false;
1523
1524             if (decoder != null)
1525             {
1526                 currentMode = decoder.currentMode;
1527                 // See if we have leftover decoder buffer to use
1528                 // Don't want to mess up decoder if we're counting or throw an exception
1529                 if (decoder.bytesLeftOverCount != 0 )
1530                 {
1531                     // Load our bytesLeftOver
1532                     byteLeftOver = decoder.bytesLeftOver[0];
1533                 }
1534             }
1535
1536             // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
1537             while (buffer.MoreData || byteLeftOver >= 0)
1538             {
1539                 byte ch;
1540
1541                 // May have a left over byte
1542                 if (byteLeftOver >= 0)
1543                 {
1544                     ch = (byte)byteLeftOver;
1545                     byteLeftOver = -1;
1546                 }
1547                 else
1548                 {
1549                     ch = buffer.GetNextByte();
1550                 }
1551
1552                 // We're in escape mode
1553                 if (ch == '~')
1554                 {
1555                     // Next char is type of switch
1556                     if (!buffer.MoreData)
1557                     {
1558                         // We don't have anything left, it'll be in decoder or a ?
1559                         // don't fail if we are allowing overflows
1560                         if (decoder == null || decoder.MustFlush)
1561                         {
1562                             // We'll be a '?'
1563                             buffer.Fallback(ch);
1564                             // break if we fail & break if we don't (because !MoreData)
1565                             // Add succeeded, continue
1566                             break;
1567                         }
1568
1569                         // Stick it in decoder
1570                         if (decoder != null)
1571                             decoder.ClearMustFlush();
1572
1573                         if (chars != null)
1574                         {
1575                             decoder.bytesLeftOverCount = 1;
1576                             decoder.bytesLeftOver[0] = (byte)'~';
1577                             bUsedDecoder = true;
1578                         }
1579                         break;
1580                     }
1581
1582                     // What type is it?, get 2nd byte
1583                     ch = buffer.GetNextByte();
1584
1585                     if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
1586                     {
1587                         // Its just a ~~ replacement for ~, add it
1588                         if (!buffer.AddChar((char)ch, 2))
1589                             // Add failed, break for converting
1590                             break;
1591
1592                         // Add succeeded, continue
1593                         continue;
1594                     }
1595                     else if (ch == '{')
1596                     {
1597                         // Switching to Double Byte mode
1598                         currentMode = ISO2022Modes.ModeHZ;
1599                         continue;
1600                     }
1601                     else if (ch == '}')
1602                     {
1603                         // Switching to ASCII mode
1604                         currentMode = ISO2022Modes.ModeASCII;
1605                         continue;
1606                     }
1607                     else if (ch == '\n')
1608                     {
1609                         // Ignore ~\n sequence
1610                         continue;
1611                     }
1612                     else
1613                     {
1614                         // Unknown escape, back up and try the '~' as a "normal" byte or lead byte
1615                         buffer.AdjustBytes(-1);
1616                         ch = (byte)'~';
1617                     }
1618                 }
1619
1620                 // go ahead and add our data
1621                 if (currentMode != ISO2022Modes.ModeASCII)
1622                 {
1623                     // Should be ModeHZ
1624                     Contract.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
1625                     char cm;
1626
1627                     // Everett allowed characters < 0x20 to be passed as if they were ASCII
1628                     if (ch < 0x20)
1629                     {
1630                         // Emit it as ASCII
1631                         goto STOREASCII;
1632                     }
1633
1634                     // Its multibyte, should have another byte
1635                     if (!buffer.MoreData)
1636                     {
1637                         // No bytes left
1638                         // don't fail if we are allowing overflows
1639                         if (decoder == null || decoder.MustFlush)
1640                         {
1641                             // Not enough bytes, fallback lead byte
1642                             buffer.Fallback(ch);
1643
1644                             // Break if we fail & break because !MoreData
1645                             break;
1646                         }
1647
1648                         if (decoder != null)
1649                             decoder.ClearMustFlush();
1650
1651                         // Stick it in decoder
1652                         if (chars != null)
1653                         {
1654                             decoder.bytesLeftOverCount = 1;
1655                             decoder.bytesLeftOver[0] = ch;
1656                             bUsedDecoder = true;
1657                         }
1658                         break;
1659                     }
1660
1661                     // Everett uses space as an escape character for single SBCS bytes
1662                     byte ch2 = buffer.GetNextByte();
1663                     ushort iBytes = (ushort)(ch << 8 | ch2);
1664
1665                     if (ch == ' ' && ch2 != 0)
1666                     {
1667                         // Get next char and treat it like ASCII (Everett treated space like an escape
1668                         // allowing the next char to be just ascii)
1669                         cm = (char)ch2;
1670                         goto STOREMULTIBYTE;
1671                     }
1672
1673                     // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e
1674                     if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
1675                     // Everett allowed high bit mappings for same characters (but only if both bits set)
1676                         (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
1677                     {
1678                         // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
1679                         if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d)
1680                         {
1681                             iBytes = 0x2121;
1682                             goto MULTIBYTE;
1683                         }
1684
1685                         // Illegal char, use fallback.  If lead byte is 0 have to do it special and do it first
1686                         if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes)))
1687                             break;
1688                         continue;
1689                     }
1690
1691                     MULTIBYTE:
1692                     iBytes |= 0x8080;
1693                     // Look up the multibyte char to stick it in our data
1694
1695                     // We have a iBytes to try to convert.
1696                     cm = mapBytesToUnicode[iBytes];
1697
1698                     STOREMULTIBYTE:
1699
1700                     // See if it was unknown
1701                     if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
1702                     {
1703                         // Fall back the unknown stuff
1704                         if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes)))
1705                             break;
1706                         continue;
1707                     }
1708
1709                     if (!buffer.AddChar(cm, 2))
1710                         break;              // convert ran out of buffer, stop
1711                     continue;
1712                 }
1713
1714                 // Just ASCII
1715                 // We allow some chars > 7f because everett did, so we have to look them up.
1716                 STOREASCII:
1717                 char c = mapBytesToUnicode[ch];
1718
1719                 // Check if it was unknown
1720                 if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
1721                 {
1722                     // fallback the unkown bytes
1723                     if (!buffer.Fallback((byte)ch))
1724                         break;
1725                     continue;
1726                 }
1727
1728                 // Go ahead and add our ASCII character
1729                 if (!buffer.AddChar(c))
1730                     break;                  // convert ran out of buffer, stop
1731             }
1732
1733             // Need to remember our state, IF we're not counting
1734             if (chars != null && decoder != null)
1735             {
1736                 if (!bUsedDecoder)
1737                 {
1738                     // If we didn't use it, clear the byte left over
1739                     decoder.bytesLeftOverCount = 0;
1740                 }
1741
1742                 if (decoder.MustFlush && decoder.bytesLeftOverCount == 0)
1743                 {
1744                     decoder.currentMode = ISO2022Modes.ModeASCII;
1745                 }
1746                 else
1747                 {
1748                     // Either not flushing or had state (from convert)
1749                     Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
1750                         "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");
1751
1752                     decoder.currentMode = currentMode;
1753                 }
1754                 decoder.m_bytesUsed = buffer.BytesUsed;
1755             }
1756
1757             // Return # of characters we found
1758             return buffer.Count;
1759         }
1760
1761         // Note: These all end up with 1/2 bytes of average byte count, so unless we're 1 we're always
1762         // charCount/2 bytes too big.
1763         public override int GetMaxByteCount(int charCount)
1764         {
1765             if (charCount < 0)
1766                throw new ArgumentOutOfRangeException("charCount",
1767                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1768             Contract.EndContractBlock();
1769
1770             // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
1771             long byteCount = (long)charCount + 1;
1772
1773             if (EncoderFallback.MaxCharCount > 1)
1774                 byteCount *= EncoderFallback.MaxCharCount;
1775
1776             // Start with just generic DBCS values (sort of).
1777             int perChar = 2;
1778             int extraStart = 0;
1779             int extraEnd = 0;
1780
1781             switch (CodePage)
1782             {
1783                 case 50220:
1784                 case 50221:
1785                     // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
1786                     perChar = 5;        // 5 max (4.5 average)
1787                     extraEnd = 3;       // 3 bytes to shift back to ASCII
1788                     break;
1789                 case 50222:
1790                     // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
1791                     perChar = 5;        // 5 max (4.5 average)
1792                     extraEnd = 4;       // 1 byte to shift from Katakana -> DBCS, 3 bytes to shift back to ASCII from DBCS
1793                     break;
1794                 case 50225:
1795                     // 2 bytes per char + 1 byte SO, or 1 byte per char + 1 byte SI.
1796                     perChar = 3;        // 3 max, (2.5 average)
1797                     extraStart = 4;     // EUC-KR marker appears at beginning of file.
1798                     extraEnd = 1;       // 1 byte to shift back to ascii if necessary.
1799                     break;
1800                 case 52936:
1801                     // 2 bytes per char + 2 byte shift, or 1 byte + 1 byte shift
1802                     // Worst case: left over surrogate with no low surrogate is extra ?, could have to switch to ASCII, then could have HZ and flush to ASCII mode
1803                     perChar = 4;        // 4 max, (3.5 average if every other char is HZ/ASCII)
1804                     extraEnd = 2;       // 2 if we have to shift back to ASCII
1805                     break;
1806             }
1807
1808             // Return our surrogate and End plus perChar for each char.
1809             byteCount *= perChar;
1810             byteCount += extraStart + extraEnd;
1811
1812             if (byteCount > 0x7fffffff)
1813                 throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
1814
1815             return (int)byteCount;
1816         }
1817
1818         public override int GetMaxCharCount(int byteCount)
1819         {
1820             if (byteCount < 0)
1821                throw new ArgumentOutOfRangeException("byteCount",
1822                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1823             Contract.EndContractBlock();
1824
1825             int perChar = 1;
1826             int extraDecoder = 1;
1827
1828             switch (CodePage)
1829             {
1830                 case 50220:
1831                 case 50221:
1832                 case 50222:
1833                 case 50225:
1834                     perChar = 1;        // Worst case all ASCII
1835                     extraDecoder = 3;   // Could have left over 3 chars of 4 char escape sequence, that all become ?
1836                     break;
1837                 case 52936:
1838                     perChar = 1;        // Worst case all ASCII
1839                     extraDecoder = 1;   // sequences are 2 chars, so if next one is illegal, then previous 1 could be ?
1840                     break;
1841             }
1842
1843             // Figure out our length, perchar * char + whatever extra our decoder could do to us.
1844             long charCount = ((long)byteCount * perChar) + extraDecoder;
1845
1846             // Just in case we have to fall back unknown ones.
1847             if (DecoderFallback.MaxCharCount > 1)
1848                 charCount *= DecoderFallback.MaxCharCount;
1849
1850             if (charCount > 0x7fffffff)
1851                 throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
1852
1853             return (int)charCount;
1854         }
1855
1856         public override Encoder GetEncoder()
1857         {
1858             return new ISO2022Encoder(this);
1859         }
1860
1861         public override Decoder GetDecoder()
1862         {
1863             return new ISO2022Decoder(this);
1864         }
1865
1866         [Serializable]
1867         internal class ISO2022Encoder : System.Text.EncoderNLS
1868         {
1869             internal ISO2022Modes currentMode;
1870             internal ISO2022Modes shiftInOutMode;
1871
1872             internal ISO2022Encoder(EncodingNLS encoding) : base(encoding)
1873             {
1874                 // base calls reset
1875             }
1876
1877             public override void Reset()
1878             {
1879                 // Reset
1880                 currentMode = ISO2022Modes.ModeASCII;
1881                 shiftInOutMode = ISO2022Modes.ModeASCII;
1882                 charLeftOver = (char)0;
1883                 if (m_fallbackBuffer != null)
1884                     m_fallbackBuffer.Reset();
1885             }
1886
1887             // Anything left in our encoder?
1888             internal override bool HasState
1889             {
1890                 get
1891                 {
1892                     // Don't check shift-out mode, it may be ascii (JP) or not (KR)
1893                     return (this.charLeftOver != (char)0 ||
1894                             currentMode != ISO2022Modes.ModeASCII);
1895                 }
1896             }
1897         }
1898
1899         [Serializable]
1900         internal class ISO2022Decoder : System.Text.DecoderNLS
1901         {
1902             internal byte[] bytesLeftOver;
1903             internal int bytesLeftOverCount;
1904             internal ISO2022Modes currentMode;
1905             internal ISO2022Modes shiftInOutMode;
1906
1907             internal ISO2022Decoder(EncodingNLS encoding) : base(encoding)
1908             {
1909                 // base calls reset
1910             }
1911
1912             public override void Reset()
1913             {
1914                 // Reset
1915                 bytesLeftOverCount = 0;
1916                 bytesLeftOver = new byte[4];
1917                 currentMode = ISO2022Modes.ModeASCII;
1918                 shiftInOutMode = ISO2022Modes.ModeASCII;
1919                 if (m_fallbackBuffer != null)
1920                     m_fallbackBuffer.Reset();
1921             }
1922
1923             // Anything left in our decoder?
1924             internal override bool HasState
1925             {
1926                 get
1927                 {
1928                     // If have bytes left over or not shifted back to ASCII then have problem
1929                     return (this.bytesLeftOverCount != 0 ||
1930                             currentMode != ISO2022Modes.ModeASCII);
1931                 }
1932             }
1933         }
1934
1935         static ushort[] HalfToFullWidthKanaTable =
1936         {
1937             0xa1a3, // 0x8ea1 : Halfwidth Ideographic Period
1938             0xa1d6, // 0x8ea2 : Halfwidth Opening Corner Bracket
1939             0xa1d7, // 0x8ea3 : Halfwidth Closing Corner Bracket
1940             0xa1a2, // 0x8ea4 : Halfwidth Ideographic Comma
1941             0xa1a6, // 0x8ea5 : Halfwidth Katakana Middle Dot
1942             0xa5f2, // 0x8ea6 : Halfwidth Katakana Wo
1943             0xa5a1, // 0x8ea7 : Halfwidth Katakana Small A
1944             0xa5a3, // 0x8ea8 : Halfwidth Katakana Small I
1945             0xa5a5, // 0x8ea9 : Halfwidth Katakana Small U
1946             0xa5a7, // 0x8eaa : Halfwidth Katakana Small E
1947             0xa5a9, // 0x8eab : Halfwidth Katakana Small O
1948             0xa5e3, // 0x8eac : Halfwidth Katakana Small Ya
1949             0xa5e5, // 0x8ead : Halfwidth Katakana Small Yu
1950             0xa5e7, // 0x8eae : Halfwidth Katakana Small Yo
1951             0xa5c3, // 0x8eaf : Halfwidth Katakana Small Tu
1952             0xa1bc, // 0x8eb0 : Halfwidth Katakana-Hiragana Prolonged Sound Mark
1953             0xa5a2, // 0x8eb1 : Halfwidth Katakana A
1954             0xa5a4, // 0x8eb2 : Halfwidth Katakana I
1955             0xa5a6, // 0x8eb3 : Halfwidth Katakana U
1956             0xa5a8, // 0x8eb4 : Halfwidth Katakana E
1957             0xa5aa, // 0x8eb5 : Halfwidth Katakana O
1958             0xa5ab, // 0x8eb6 : Halfwidth Katakana Ka
1959             0xa5ad, // 0x8eb7 : Halfwidth Katakana Ki
1960             0xa5af, // 0x8eb8 : Halfwidth Katakana Ku
1961             0xa5b1, // 0x8eb9 : Halfwidth Katakana Ke
1962             0xa5b3, // 0x8eba : Halfwidth Katakana Ko
1963             0xa5b5, // 0x8ebb : Halfwidth Katakana Sa
1964             0xa5b7, // 0x8ebc : Halfwidth Katakana Si
1965             0xa5b9, // 0x8ebd : Halfwidth Katakana Su
1966             0xa5bb, // 0x8ebe : Halfwidth Katakana Se
1967             0xa5bd, // 0x8ebf : Halfwidth Katakana So
1968             0xa5bf, // 0x8ec0 : Halfwidth Katakana Ta
1969             0xa5c1, // 0x8ec1 : Halfwidth Katakana Ti
1970             0xa5c4, // 0x8ec2 : Halfwidth Katakana Tu
1971             0xa5c6, // 0x8ec3 : Halfwidth Katakana Te
1972             0xa5c8, // 0x8ec4 : Halfwidth Katakana To
1973             0xa5ca, // 0x8ec5 : Halfwidth Katakana Na
1974             0xa5cb, // 0x8ec6 : Halfwidth Katakana Ni
1975             0xa5cc, // 0x8ec7 : Halfwidth Katakana Nu
1976             0xa5cd, // 0x8ec8 : Halfwidth Katakana Ne
1977             0xa5ce, // 0x8ec9 : Halfwidth Katakana No
1978             0xa5cf, // 0x8eca : Halfwidth Katakana Ha
1979             0xa5d2, // 0x8ecb : Halfwidth Katakana Hi
1980             0xa5d5, // 0x8ecc : Halfwidth Katakana Hu
1981             0xa5d8, // 0x8ecd : Halfwidth Katakana He
1982             0xa5db, // 0x8ece : Halfwidth Katakana Ho
1983             0xa5de, // 0x8ecf : Halfwidth Katakana Ma
1984             0xa5df, // 0x8ed0 : Halfwidth Katakana Mi
1985             0xa5e0, // 0x8ed1 : Halfwidth Katakana Mu
1986             0xa5e1, // 0x8ed2 : Halfwidth Katakana Me
1987             0xa5e2, // 0x8ed3 : Halfwidth Katakana Mo
1988             0xa5e4, // 0x8ed4 : Halfwidth Katakana Ya
1989             0xa5e6, // 0x8ed5 : Halfwidth Katakana Yu
1990             0xa5e8, // 0x8ed6 : Halfwidth Katakana Yo
1991             0xa5e9, // 0x8ed7 : Halfwidth Katakana Ra
1992             0xa5ea, // 0x8ed8 : Halfwidth Katakana Ri
1993             0xa5eb, // 0x8ed9 : Halfwidth Katakana Ru
1994             0xa5ec, // 0x8eda : Halfwidth Katakana Re
1995             0xa5ed, // 0x8edb : Halfwidth Katakana Ro
1996             0xa5ef, // 0x8edc : Halfwidth Katakana Wa
1997             0xa5f3, // 0x8edd : Halfwidth Katakana N
1998             0xa1ab, // 0x8ede : Halfwidth Katakana Voiced Sound Mark
1999             0xa1ac  // 0x8edf : Halfwidth Katakana Semi-Voiced Sound Mark
2000         };
2001     }
2002 }
2003 #endif // FEATURE_CODEPAGES_FILE
2004