1 // Copyright (c) Microsoft. All rights reserved.
2 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
4 namespace System.Globalization
8 using System.Collections;
9 using System.Collections.Generic;
10 using System.Runtime.CompilerServices;
11 using System.Runtime.InteropServices;
12 using System.Runtime.Versioning;
13 using System.Security;
14 using System.Threading;
15 using System.Diagnostics.Contracts;
18 internal static partial class EncodingTable
20 static int GetNumEncodingItems ()
22 return encodingDataPtr.Length;
25 #region "from coreclr/src/classlibnative/nls/encodingdata.cpp"
28 static InternalEncodingDataItem ENC (string name, ushort cp) { return new InternalEncodingDataItem () { webName = name, codePage = cp }; }
30 internal static InternalEncodingDataItem [] encodingDataPtr = new InternalEncodingDataItem [] {
32 // encoding name, codepage.
33 ENC ("ANSI_X3.4-1968", 20127 ),
34 ENC ("ANSI_X3.4-1986", 20127 ),
35 ENC ("ascii", 20127 ),
36 ENC ("cp367", 20127 ),
37 ENC ("cp819", 28591 ),
38 ENC ("csASCII", 20127 ),
39 ENC ("csISOLatin1", 28591 ),
40 ENC ("csUnicode11UTF7", 65000 ),
41 ENC ("IBM367", 20127 ),
42 ENC ("ibm819", 28591 ),
43 ENC ("ISO-10646-UCS-2", 1200 ),
44 ENC ("iso-8859-1", 28591 ),
45 ENC ("iso-ir-100", 28591 ),
46 ENC ("iso-ir-6", 20127 ),
47 ENC ("ISO646-US", 20127 ),
48 ENC ("iso8859-1", 28591 ),
49 ENC ("ISO_646.irv:1991", 20127 ),
50 ENC ("iso_8859-1", 28591 ),
51 ENC ("iso_8859-1:1987", 28591 ),
53 ENC ("latin1", 28591 ),
55 ENC ("unicode", 1200),
56 ENC ("unicode-1-1-utf-7", 65000 ),
57 ENC ("unicode-1-1-utf-8", 65001 ),
58 ENC ("unicode-2-0-utf-7", 65000 ),
59 ENC ("unicode-2-0-utf-8", 65001 ),
60 // People get confused about the FFFE here. We can't change this because it'd break existing apps.
61 // This has been this way for a long time, including in Mlang.
62 ENC ("unicodeFFFE", 1201), // Big Endian, BOM seems backwards, think of the BOM in little endian order.
64 ENC ("us-ascii", 20127 ),
65 ENC ("utf-16", 1200 ),
66 ENC ("UTF-16BE", 1201),
67 ENC ("UTF-16LE", 1200),
68 ENC ("utf-32", 12000 ),
69 ENC ("UTF-32BE", 12001 ),
70 ENC ("UTF-32LE", 12000 ),
71 ENC ("utf-7", 65000 ),
72 ENC ("utf-8", 65001 ),
73 ENC ("x-unicode-1-1-utf-7", 65000 ),
74 ENC ("x-unicode-1-1-utf-8", 65001 ),
75 ENC ("x-unicode-2-0-utf-7", 65000 ),
76 ENC ("x-unicode-2-0-utf-8", 65001 ),
79 // encoding name, codepage.
81 ENC ("ANSI_X3.4-1968", 20127),
82 ENC ("ANSI_X3.4-1986", 20127),
83 // ENC (L"_autodetect", 50932),
84 // ENC (L"_autodetect_all", 50001),
85 // ENC (L"_autodetect_kr", 50949),
86 ENC ("arabic", 28596),
88 ENC ("ASMO-708", 708),
90 ENC ("Big5-HKSCS", 950),
91 ENC ("CCSID00858", 858),
92 ENC ("CCSID00924", 20924),
93 ENC ("CCSID01140", 1140),
94 ENC ("CCSID01141", 1141),
95 ENC ("CCSID01142", 1142),
96 ENC ("CCSID01143", 1143),
97 ENC ("CCSID01144", 1144),
98 ENC ("CCSID01145", 1145),
99 ENC ("CCSID01146", 1146),
100 ENC ("CCSID01147", 1147),
101 ENC ("CCSID01148", 1148),
102 ENC ("CCSID01149", 1149),
103 ENC ("chinese", 936),
104 ENC ("cn-big5", 950),
106 ENC ("CP00858", 858),
107 ENC ("CP00924", 20924),
108 ENC ("CP01140", 1140),
109 ENC ("CP01141", 1141),
110 ENC ("CP01142", 1142),
111 ENC ("CP01143", 1143),
112 ENC ("CP01144", 1144),
113 ENC ("CP01145", 1145),
114 ENC ("CP01146", 1146),
115 ENC ("CP01147", 1147),
116 ENC ("CP01148", 1148),
117 ENC ("CP01149", 1149),
119 ENC ("cp1025", 21025),
120 ENC ("CP1026", 1026),
121 ENC ("cp1256", 1256),
122 ENC ("CP273", 20273),
123 ENC ("CP278", 20278),
124 ENC ("CP280", 20280),
125 ENC ("CP284", 20284),
126 ENC ("CP285", 20285),
127 ENC ("cp290", 20290),
128 ENC ("cp297", 20297),
129 ENC ("cp367", 20127),
130 ENC ("cp420", 20420),
131 ENC ("cp423", 20423),
132 ENC ("cp424", 20424),
135 ENC ("cp50227", 50227),
136 //ENC (L"cp50229", 50229),
137 ENC ("cp819", 28591),
152 ENC ("CP871", 20871),
154 ENC ("cp880", 20880),
155 ENC ("CP905", 20905),
156 //ENC (L"cp930", 50930),
157 //ENC (L"cp933", 50933),
158 //ENC (L"cp935", 50935),
159 //ENC (L"cp937", 50937),
160 //ENC (L"cp939", 50939),
161 ENC ("csASCII", 20127),
163 ENC ("csEUCKR", 51949),
164 ENC ("csEUCPkdFmtJapanese", 51932),
165 ENC ("csGB2312", 936),
166 ENC ("csGB231280", 936),
167 ENC ("csIBM037", 37),
168 ENC ("csIBM1026", 1026),
169 ENC ("csIBM273", 20273),
170 ENC ("csIBM277", 20277),
171 ENC ("csIBM278", 20278),
172 ENC ("csIBM280", 20280),
173 ENC ("csIBM284", 20284),
174 ENC ("csIBM285", 20285),
175 ENC ("csIBM290", 20290),
176 ENC ("csIBM297", 20297),
177 ENC ("csIBM420", 20420),
178 ENC ("csIBM423", 20423),
179 ENC ("csIBM424", 20424),
180 ENC ("csIBM500", 500),
181 ENC ("csIBM870", 870),
182 ENC ("csIBM871", 20871),
183 ENC ("csIBM880", 20880),
184 ENC ("csIBM905", 20905),
185 ENC ("csIBMThai", 20838),
186 ENC ("csISO2022JP", 50221),
187 ENC ("csISO2022KR", 50225),
188 ENC ("csISO58GB231280", 936),
189 ENC ("csISOLatin1", 28591),
190 ENC ("csISOLatin2", 28592),
191 ENC ("csISOLatin3", 28593),
192 ENC ("csISOLatin4", 28594),
193 ENC ("csISOLatin5", 28599),
194 ENC ("csISOLatin9", 28605),
195 ENC ("csISOLatinArabic", 28596),
196 ENC ("csISOLatinCyrillic", 28595),
197 ENC ("csISOLatinGreek", 28597),
198 ENC ("csISOLatinHebrew", 28598),
199 ENC ("csKOI8R", 20866),
200 ENC ("csKSC56011987", 949),
201 ENC ("csPC8CodePage437", 437),
202 ENC ("csShiftJIS", 932),
203 ENC ("csUnicode11UTF7", 65000),
204 ENC ("csWindows31J", 932),
205 ENC ("cyrillic", 28595),
206 ENC ("DIN_66003", 20106),
207 ENC ("DOS-720", 720),
208 ENC ("DOS-862", 862),
209 ENC ("DOS-874", 874),
210 ENC ("ebcdic-cp-ar1", 20420),
211 ENC ("ebcdic-cp-be", 500),
212 ENC ("ebcdic-cp-ca", 37),
213 ENC ("ebcdic-cp-ch", 500),
214 ENC ("EBCDIC-CP-DK", 20277),
215 ENC ("ebcdic-cp-es", 20284),
216 ENC ("ebcdic-cp-fi", 20278),
217 ENC ("ebcdic-cp-fr", 20297),
218 ENC ("ebcdic-cp-gb", 20285),
219 ENC ("ebcdic-cp-gr", 20423),
220 ENC ("ebcdic-cp-he", 20424),
221 ENC ("ebcdic-cp-is", 20871),
222 ENC ("ebcdic-cp-it", 20280),
223 ENC ("ebcdic-cp-nl", 37),
224 ENC ("EBCDIC-CP-NO", 20277),
225 ENC ("ebcdic-cp-roece", 870),
226 ENC ("ebcdic-cp-se", 20278),
227 ENC ("ebcdic-cp-tr", 20905),
228 ENC ("ebcdic-cp-us", 37),
229 ENC ("ebcdic-cp-wt", 37),
230 ENC ("ebcdic-cp-yu", 870),
231 ENC ("EBCDIC-Cyrillic", 20880),
232 ENC ("ebcdic-de-273+euro", 1141),
233 ENC ("ebcdic-dk-277+euro", 1142),
234 ENC ("ebcdic-es-284+euro", 1145),
235 ENC ("ebcdic-fi-278+euro", 1143),
236 ENC ("ebcdic-fr-297+euro", 1147),
237 ENC ("ebcdic-gb-285+euro", 1146),
238 ENC ("ebcdic-international-500+euro", 1148),
239 ENC ("ebcdic-is-871+euro", 1149),
240 ENC ("ebcdic-it-280+euro", 1144),
241 ENC ("EBCDIC-JP-kana", 20290),
242 ENC ("ebcdic-Latin9--euro", 20924),
243 ENC ("ebcdic-no-277+euro", 1142),
244 ENC ("ebcdic-se-278+euro", 1143),
245 ENC ("ebcdic-us-37+euro", 1140),
246 ENC ("ECMA-114", 28596),
247 ENC ("ECMA-118", 28597),
248 ENC ("ELOT_928", 28597),
249 ENC ("euc-cn", 51936),
250 ENC ("euc-jp", 51932),
251 ENC ("euc-kr", 51949),
252 ENC ("Extended_UNIX_Code_Packed_Format_for_Japanese", 51932),
253 ENC ("GB18030", 54936),
255 ENC ("GB2312-80", 936),
256 ENC ("GB231280", 936),
258 ENC ("GB_2312-80", 936),
259 ENC ("German", 20106),
260 ENC ("greek", 28597),
261 ENC ("greek8", 28597),
262 ENC ("hebrew", 28598),
263 ENC ("hz-gb-2312", 52936),
264 ENC ("IBM-Thai", 20838),
265 ENC ("IBM00858", 858),
266 ENC ("IBM00924", 20924),
267 ENC ("IBM01047", 1047),
268 ENC ("IBM01140", 1140),
269 ENC ("IBM01141", 1141),
270 ENC ("IBM01142", 1142),
271 ENC ("IBM01143", 1143),
272 ENC ("IBM01144", 1144),
273 ENC ("IBM01145", 1145),
274 ENC ("IBM01146", 1146),
275 ENC ("IBM01147", 1147),
276 ENC ("IBM01148", 1148),
277 ENC ("IBM01149", 1149),
279 ENC ("IBM1026", 1026),
280 ENC ("IBM273", 20273),
281 ENC ("IBM277", 20277),
282 ENC ("IBM278", 20278),
283 ENC ("IBM280", 20280),
284 ENC ("IBM284", 20284),
285 ENC ("IBM285", 20285),
286 ENC ("IBM290", 20290),
287 ENC ("IBM297", 20297),
288 ENC ("IBM367", 20127),
289 ENC ("IBM420", 20420),
290 ENC ("IBM423", 20423),
291 ENC ("IBM424", 20424),
296 ENC ("ibm819", 28591),
310 ENC ("IBM871", 20871),
311 ENC ("IBM880", 20880),
312 ENC ("IBM905", 20905),
314 ENC ("ISO-10646-UCS-2", 1200),
315 ENC ("iso-2022-jp", 50220),
316 ENC ("iso-2022-jpeuc", 51932),
317 ENC ("iso-2022-kr", 50225),
318 ENC ("iso-2022-kr-7", 50225),
319 ENC ("iso-2022-kr-7bit", 50225),
320 ENC ("iso-2022-kr-8", 51949),
321 ENC ("iso-2022-kr-8bit", 51949),
322 ENC ("iso-8859-1", 28591),
323 ENC ("iso-8859-11", 874),
324 ENC ("iso-8859-13", 28603),
325 ENC ("iso-8859-15", 28605),
326 ENC ("iso-8859-2", 28592),
327 ENC ("iso-8859-3", 28593),
328 ENC ("iso-8859-4", 28594),
329 ENC ("iso-8859-5", 28595),
330 ENC ("iso-8859-6", 28596),
331 ENC ("iso-8859-7", 28597),
332 ENC ("iso-8859-8", 28598),
333 ENC ("ISO-8859-8 Visual", 28598),
334 ENC ("iso-8859-8-i", 38598),
335 ENC ("iso-8859-9", 28599),
336 ENC ("iso-ir-100", 28591),
337 ENC ("iso-ir-101", 28592),
338 ENC ("iso-ir-109", 28593),
339 ENC ("iso-ir-110", 28594),
340 ENC ("iso-ir-126", 28597),
341 ENC ("iso-ir-127", 28596),
342 ENC ("iso-ir-138", 28598),
343 ENC ("iso-ir-144", 28595),
344 ENC ("iso-ir-148", 28599),
345 ENC ("iso-ir-149", 949),
346 ENC ("iso-ir-58", 936),
347 ENC ("iso-ir-6", 20127),
348 ENC ("ISO646-US", 20127),
349 ENC ("iso8859-1", 28591),
350 ENC ("iso8859-2", 28592),
351 ENC ("ISO_646.irv:1991", 20127),
352 ENC ("iso_8859-1", 28591),
353 ENC ("ISO_8859-15", 28605),
354 ENC ("iso_8859-1:1987", 28591),
355 ENC ("iso_8859-2", 28592),
356 ENC ("iso_8859-2:1987", 28592),
357 ENC ("ISO_8859-3", 28593),
358 ENC ("ISO_8859-3:1988", 28593),
359 ENC ("ISO_8859-4", 28594),
360 ENC ("ISO_8859-4:1988", 28594),
361 ENC ("ISO_8859-5", 28595),
362 ENC ("ISO_8859-5:1988", 28595),
363 ENC ("ISO_8859-6", 28596),
364 ENC ("ISO_8859-6:1987", 28596),
365 ENC ("ISO_8859-7", 28597),
366 ENC ("ISO_8859-7:1987", 28597),
367 ENC ("ISO_8859-8", 28598),
368 ENC ("ISO_8859-8:1988", 28598),
369 ENC ("ISO_8859-9", 28599),
370 ENC ("ISO_8859-9:1989", 28599),
374 ENC ("koi8-r", 20866),
375 ENC ("koi8-ru", 21866),
376 ENC ("koi8-u", 21866),
377 ENC ("koi8r", 20866),
379 ENC ("ks-c-5601", 949),
380 ENC ("ks-c5601", 949),
381 ENC ("KSC5601", 949),
382 ENC ("KSC_5601", 949),
383 ENC ("ks_c_5601", 949),
384 ENC ("ks_c_5601-1987", 949),
385 ENC ("ks_c_5601-1989", 949),
386 ENC ("ks_c_5601_1987", 949),
393 ENC ("latin1", 28591),
394 ENC ("latin2", 28592),
395 ENC ("latin3", 28593),
396 ENC ("latin4", 28594),
397 ENC ("latin5", 28599),
398 ENC ("latin9", 28605),
399 ENC ("logical", 28598),
400 ENC ("macintosh", 10000),
401 ENC ("ms_Kanji", 932),
402 ENC ("Norwegian", 20108),
403 ENC ("NS_4551-1", 20108),
404 ENC ("PC-Multilingual-850+euro", 858),
405 ENC ("SEN_850200_B", 20107),
406 ENC ("shift-jis", 932),
407 ENC ("shift_jis", 932),
409 ENC ("Swedish", 20107),
410 ENC ("TIS-620", 874),
412 ENC ("unicode", 1200),
413 ENC ("unicode-1-1-utf-7", 65000),
414 ENC ("unicode-1-1-utf-8", 65001),
415 ENC ("unicode-2-0-utf-7", 65000),
416 ENC ("unicode-2-0-utf-8", 65001),
417 // People get confused about the FFFE here. We can't change this because it'd break existing apps.
418 // This has been this way for a long time, including in Mlang.
419 ENC ("unicodeFFFE", 1201), // Big Endian, BOM seems backwards, think of the BOM in little endian order.
421 ENC ("us-ascii", 20127),
422 ENC ("utf-16", 1200),
423 ENC ("UTF-16BE", 1201),
424 ENC ("UTF-16LE", 1200),
425 ENC ("utf-32", 12000),
426 ENC ("UTF-32BE", 12001),
427 ENC ("UTF-32LE", 12000),
428 ENC ("utf-7", 65000),
429 ENC ("utf-8", 65001),
430 ENC ("visual", 28598),
431 ENC ("windows-1250", 1250),
432 ENC ("windows-1251", 1251),
433 ENC ("windows-1252", 1252),
434 ENC ("windows-1253", 1253),
435 ENC ("Windows-1254", 1254),
436 ENC ("windows-1255", 1255),
437 ENC ("windows-1256", 1256),
438 ENC ("windows-1257", 1257),
439 ENC ("windows-1258", 1258),
440 ENC ("windows-874", 874),
441 ENC ("x-ansi", 1252),
442 ENC ("x-Chinese-CNS", 20000),
443 ENC ("x-Chinese-Eten", 20002),
444 ENC ("x-cp1250", 1250),
445 ENC ("x-cp1251", 1251),
446 ENC ("x-cp20001", 20001),
447 ENC ("x-cp20003", 20003),
448 ENC ("x-cp20004", 20004),
449 ENC ("x-cp20005", 20005),
450 ENC ("x-cp20261", 20261),
451 ENC ("x-cp20269", 20269),
452 ENC ("x-cp20936", 20936),
453 ENC ("x-cp20949", 20949),
454 ENC ("x-cp50227", 50227),
455 //ENC (L"x-cp50229", 50229),
456 //ENC (L"X-EBCDIC-JapaneseAndUSCanada", 50931),
457 ENC ("X-EBCDIC-KoreanExtended", 20833),
458 ENC ("x-euc", 51932),
459 ENC ("x-euc-cn", 51936),
460 ENC ("x-euc-jp", 51932),
461 ENC ("x-Europa", 29001),
462 ENC ("x-IA5", 20105),
463 ENC ("x-IA5-German", 20106),
464 ENC ("x-IA5-Norwegian", 20108),
465 ENC ("x-IA5-Swedish", 20107),
466 ENC ("x-iscii-as", 57006),
467 ENC ("x-iscii-be", 57003),
468 ENC ("x-iscii-de", 57002),
469 ENC ("x-iscii-gu", 57010),
470 ENC ("x-iscii-ka", 57008),
471 ENC ("x-iscii-ma", 57009),
472 ENC ("x-iscii-or", 57007),
473 ENC ("x-iscii-pa", 57011),
474 ENC ("x-iscii-ta", 57004),
475 ENC ("x-iscii-te", 57005),
476 ENC ("x-mac-arabic", 10004),
477 ENC ("x-mac-ce", 10029),
478 ENC ("x-mac-chinesesimp", 10008),
479 ENC ("x-mac-chinesetrad", 10002),
480 ENC ("x-mac-croatian", 10082),
481 ENC ("x-mac-cyrillic", 10007),
482 ENC ("x-mac-greek", 10006),
483 ENC ("x-mac-hebrew", 10005),
484 ENC ("x-mac-icelandic", 10079),
485 ENC ("x-mac-japanese", 10001),
486 ENC ("x-mac-korean", 10003),
487 ENC ("x-mac-romanian", 10010),
488 ENC ("x-mac-thai", 10021),
489 ENC ("x-mac-turkish", 10081),
490 ENC ("x-mac-ukrainian", 10017),
491 ENC ("x-ms-cp932", 932),
493 ENC ("x-unicode-1-1-utf-7", 65000),
494 ENC ("x-unicode-1-1-utf-8", 65001),
495 ENC ("x-unicode-2-0-utf-7", 65000),
496 ENC ("x-unicode-2-0-utf-8", 65001),
497 ENC ("x-x-big5", 950),
499 #endif // FEATURE_CORECLR
503 // Working set optimization:
504 // 1. code page, family code page stored as unsigned short
505 // 2. if web/header/body names are the same, only web name is stored; otherwise, we store "|webname|headername|bodyname"
506 // 3. Move flags before names to fill gap on 64-bit platforms
508 static InternalCodePageDataItem MapCodePageDataItem (UInt16 cp, UInt16 fcp, string names, uint flags) { return new InternalCodePageDataItem () { codePage = cp, uiFamilyCodePage = fcp, flags = flags, Names = names }; }
510 // Information about codepages.
512 internal static InternalCodePageDataItem [] codePageDataPtr = new InternalCodePageDataItem [] {
516 // code page, family code page, web name, header name, body name, flags
518 MapCodePageDataItem( 1200, 1200, "utf-16", MIMECONTF_SAVABLE_BROWSER), // "Unicode"
519 MapCodePageDataItem( 1201, 1200, "utf-16BE", 0), // Big Endian, old FFFE BOM seems backwards, think of the BOM in little endian order.
520 MapCodePageDataItem( 12000, 1200, "utf-32", 0), // "Unicode (UTF-32)"
521 MapCodePageDataItem( 12001, 1200, "utf-32BE", 0), // "Unicode (UTF-32 Big Endian)"
522 MapCodePageDataItem( 20127, 1252, "us-ascii", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "US-ASCII"
523 MapCodePageDataItem( 28591, 1252, "iso-8859-1", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Western European (ISO)"
524 MapCodePageDataItem( 65000, 1200, "utf-7", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Unicode (UTF-7)"
525 MapCodePageDataItem( 65001, 1200, "utf-8", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Unicode (UTF-8)"
527 #else //FEATURE_CORECLR
530 // code page, family code page, web name, header name, body name, flags
533 MapCodePageDataItem( 37, 1252, "IBM037", 0), // "IBM EBCDIC (US-Canada)"
534 MapCodePageDataItem( 437, 1252, "IBM437", 0), // "OEM United States"
535 MapCodePageDataItem( 500, 1252, "IBM500", 0), // "IBM EBCDIC (International)"
536 MapCodePageDataItem( 708, 1256, "ASMO-708", MIMECONTF_BROWSER | MIMECONTF_SAVABLE_BROWSER), // "Arabic (ASMO 708)"
537 // MapCodePageDataItem( 720, 1256, "DOS-720", MIMECONTF_BROWSER | MIMECONTF_SAVABLE_BROWSER), // "Arabic (DOS)"
538 // MapCodePageDataItem( 737, 1253, "ibm737", 0), // "Greek (DOS)"
539 // MapCodePageDataItem( 775, 1257, "ibm775", 0), // "Baltic (DOS)"
540 MapCodePageDataItem( 850, 1252, "ibm850", 0), // "Western European (DOS)"
541 MapCodePageDataItem( 852, 1250, "ibm852", MIMECONTF_BROWSER | MIMECONTF_SAVABLE_BROWSER), // "Central European (DOS)"
542 MapCodePageDataItem( 855, 1252, "IBM855", 0), // "OEM Cyrillic"
543 MapCodePageDataItem( 857, 1254, "ibm857", 0), // "Turkish (DOS)"
544 MapCodePageDataItem( 858, 1252, "IBM00858", 0), // "OEM Multilingual Latin I"
545 MapCodePageDataItem( 860, 1252, "IBM860", 0), // "Portuguese (DOS)"
546 MapCodePageDataItem( 861, 1252, "ibm861", 0), // "Icelandic (DOS)"
547 MapCodePageDataItem( 862, 1255, "DOS-862", MIMECONTF_BROWSER | MIMECONTF_SAVABLE_BROWSER), // "Hebrew (DOS)"
548 MapCodePageDataItem( 863, 1252, "IBM863", 0), // "French Canadian (DOS)"
549 MapCodePageDataItem( 864, 1256, "IBM864", 0), // "Arabic (864)"
550 MapCodePageDataItem( 865, 1252, "IBM865", 0), // "Nordic (DOS)"
551 MapCodePageDataItem( 866, 1251, "cp866", MIMECONTF_BROWSER | MIMECONTF_SAVABLE_BROWSER), // "Cyrillic (DOS)"
552 MapCodePageDataItem( 869, 1253, "ibm869", 0), // "Greek, Modern (DOS)"
553 MapCodePageDataItem( 870, 1250, "IBM870", 0), // "IBM EBCDIC (Multilingual Latin-2)"
554 MapCodePageDataItem( 874, 874, "windows-874", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Thai (Windows)"
555 MapCodePageDataItem( 875, 1253, "cp875", 0), // "IBM EBCDIC (Greek Modern)"
556 MapCodePageDataItem( 932, 932, "|shift_jis|iso-2022-jp|iso-2022-jp", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Japanese (Shift-JIS)"
557 MapCodePageDataItem( 936, 936, "gb2312", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Chinese Simplified (GB2312)"
558 MapCodePageDataItem( 949, 949, "ks_c_5601-1987", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Korean"
559 MapCodePageDataItem( 950, 950, "big5", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Chinese Traditional (Big5)"
560 MapCodePageDataItem( 1026, 1254, "IBM1026", 0), // "IBM EBCDIC (Turkish Latin-5)"
561 MapCodePageDataItem( 1047, 1252, "IBM01047", 0), // "IBM Latin-1"
562 MapCodePageDataItem( 1140, 1252, "IBM01140", 0), // "IBM EBCDIC (US-Canada-Euro)"
563 MapCodePageDataItem( 1141, 1252, "IBM01141", 0), // "IBM EBCDIC (Germany-Euro)"
564 MapCodePageDataItem( 1142, 1252, "IBM01142", 0), // "IBM EBCDIC (Denmark-Norway-Euro)"
565 MapCodePageDataItem( 1143, 1252, "IBM01143", 0), // "IBM EBCDIC (Finland-Sweden-Euro)"
566 MapCodePageDataItem( 1144, 1252, "IBM01144", 0), // "IBM EBCDIC (Italy-Euro)"
567 MapCodePageDataItem( 1145, 1252, "IBM01145", 0), // "IBM EBCDIC (Spain-Euro)"
568 MapCodePageDataItem( 1146, 1252, "IBM01146", 0), // "IBM EBCDIC (UK-Euro)"
569 MapCodePageDataItem( 1147, 1252, "IBM01147", 0), // "IBM EBCDIC (France-Euro)"
570 MapCodePageDataItem( 1148, 1252, "IBM01148", 0), // "IBM EBCDIC (International-Euro)"
571 MapCodePageDataItem( 1149, 1252, "IBM01149", 0), // "IBM EBCDIC (Icelandic-Euro)"
572 MapCodePageDataItem( 1200, 1200, "utf-16", MIMECONTF_SAVABLE_BROWSER), // "Unicode"
573 MapCodePageDataItem( 1201, 1200, "utf-16BE", 0), // Big Endian, old FFFE BOM seems backwards, think of the BOM in little endian order.
574 MapCodePageDataItem( 1250, 1250, "|windows-1250|windows-1250|iso-8859-2", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Central European (Windows)"
575 MapCodePageDataItem( 1251, 1251, "|windows-1251|windows-1251|koi8-r", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Cyrillic (Windows)"
576 MapCodePageDataItem( 1252, 1252, "|Windows-1252|Windows-1252|iso-8859-1", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Western European (Windows)"
577 MapCodePageDataItem( 1253, 1253, "|windows-1253|windows-1253|iso-8859-7", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Greek (Windows)"
578 MapCodePageDataItem( 1254, 1254, "|windows-1254|windows-1254|iso-8859-9", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Turkish (Windows)"
579 MapCodePageDataItem( 1255, 1255, "windows-1255", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Hebrew (Windows)"
580 MapCodePageDataItem( 1256, 1256, "windows-1256", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Arabic (Windows)"
581 MapCodePageDataItem( 1257, 1257, "windows-1257", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Baltic (Windows)"
582 MapCodePageDataItem( 1258, 1258, "windows-1258", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Vietnamese (Windows)"
583 // MapCodePageDataItem( 1361, 949, "Johab", 0), // "Korean (Johab)"
584 MapCodePageDataItem( 10000, 1252, "macintosh", 0), // "Western European (Mac)"
586 MapCodePageDataItem( 10001, 932, "x-mac-japanese", 0), // "Japanese (Mac)"
587 MapCodePageDataItem( 10002, 950, "x-mac-chinesetrad", 0), // "Chinese Traditional (Mac)"
588 MapCodePageDataItem( 10003, 949, "x-mac-korean", 0), // "Korean (Mac)"
589 MapCodePageDataItem( 10004, 1256, "x-mac-arabic", 0), // "Arabic (Mac)"
590 MapCodePageDataItem( 10005, 1255, "x-mac-hebrew", 0), // "Hebrew (Mac)"
591 MapCodePageDataItem( 10006, 1253, "x-mac-greek", 0), // "Greek (Mac)"
592 MapCodePageDataItem( 10007, 1251, "x-mac-cyrillic", 0), // "Cyrillic (Mac)"
593 MapCodePageDataItem( 10008, 936, "x-mac-chinesesimp", 0), // "Chinese Simplified (Mac)"
594 MapCodePageDataItem( 10010, 1250, "x-mac-romanian", 0), // "Romanian (Mac)"
595 MapCodePageDataItem( 10017, 1251, "x-mac-ukrainian", 0), // "Ukrainian (Mac)"
596 MapCodePageDataItem( 10021, 874, "x-mac-thai", 0), // "Thai (Mac)"
597 MapCodePageDataItem( 10029, 1250, "x-mac-ce", 0), // "Central European (Mac)"
599 MapCodePageDataItem( 10079, 1252, "x-mac-icelandic", 0), // "Icelandic (Mac)"
600 // MapCodePageDataItem( 10081, 1254, "x-mac-turkish", 0), // "Turkish (Mac)"
601 // MapCodePageDataItem( 10082, 1250, "x-mac-croatian", 0), // "Croatian (Mac)"
602 MapCodePageDataItem( 12000, 1200, "utf-32", 0), // "Unicode (UTF-32)"
603 MapCodePageDataItem( 12001, 1200, "utf-32BE", 0), // "Unicode (UTF-32 Big Endian)"
605 MapCodePageDataItem( 20000, 950, "x-Chinese-CNS", 0), // "Chinese Traditional (CNS)"
606 MapCodePageDataItem( 20001, 950, "x-cp20001", 0), // "TCA Taiwan"
607 MapCodePageDataItem( 20002, 950, "x-Chinese-Eten", 0), // "Chinese Traditional (Eten)"
608 MapCodePageDataItem( 20003, 950, "x-cp20003", 0), // "IBM5550 Taiwan"
609 MapCodePageDataItem( 20004, 950, "x-cp20004", 0), // "TeleText Taiwan"
610 MapCodePageDataItem( 20005, 950, "x-cp20005", 0), // "Wang Taiwan"
611 MapCodePageDataItem( 20105, 1252, "x-IA5", 0), // "Western European (IA5)"
612 MapCodePageDataItem( 20106, 1252, "x-IA5-German", 0), // "German (IA5)"
613 MapCodePageDataItem( 20107, 1252, "x-IA5-Swedish", 0), // "Swedish (IA5)"
614 MapCodePageDataItem( 20108, 1252, "x-IA5-Norwegian", 0), // "Norwegian (IA5)"
616 MapCodePageDataItem( 20127, 1252, "us-ascii", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "US-ASCII"
617 // MapCodePageDataItem( 20261, 1252, "x-cp20261", 0), // "T.61"
618 // MapCodePageDataItem( 20269, 1252, "x-cp20269", 0), // "ISO-6937"
619 MapCodePageDataItem( 20273, 1252, "IBM273", 0), // "IBM EBCDIC (Germany)"
620 MapCodePageDataItem( 20277, 1252, "IBM277", 0), // "IBM EBCDIC (Denmark-Norway)"
621 MapCodePageDataItem( 20278, 1252, "IBM278", 0), // "IBM EBCDIC (Finland-Sweden)"
622 MapCodePageDataItem( 20280, 1252, "IBM280", 0), // "IBM EBCDIC (Italy)"
623 MapCodePageDataItem( 20284, 1252, "IBM284", 0), // "IBM EBCDIC (Spain)"
624 MapCodePageDataItem( 20285, 1252, "IBM285", 0), // "IBM EBCDIC (UK)"
625 MapCodePageDataItem( 20290, 932, "IBM290", 0), // "IBM EBCDIC (Japanese katakana)"
626 MapCodePageDataItem( 20297, 1252, "IBM297", 0), // "IBM EBCDIC (France)"
627 MapCodePageDataItem( 20420, 1256, "IBM420", 0), // "IBM EBCDIC (Arabic)"
628 // MapCodePageDataItem( 20423, 1253, "IBM423", 0), // "IBM EBCDIC (Greek)"
629 MapCodePageDataItem( 20424, 1255, "IBM424", 0), // "IBM EBCDIC (Hebrew)"
630 // MapCodePageDataItem( 20833, 949, "x-EBCDIC-KoreanExtended", 0), // "IBM EBCDIC (Korean Extended)"
631 // MapCodePageDataItem( 20838, 874, "IBM-Thai", 0), // "IBM EBCDIC (Thai)"
632 MapCodePageDataItem( 20866, 1251, "koi8-r", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Cyrillic (KOI8-R)"
633 MapCodePageDataItem( 20871, 1252, "IBM871", 0), // "IBM EBCDIC (Icelandic)"
635 MapCodePageDataItem( 20880, 1251, "IBM880", 0), // "IBM EBCDIC (Cyrillic Russian)"
636 MapCodePageDataItem( 20905, 1254, "IBM905", 0), // "IBM EBCDIC (Turkish)"
637 MapCodePageDataItem( 20924, 1252, "IBM00924", 0), // "IBM Latin-1"
638 MapCodePageDataItem( 20932, 932, "EUC-JP", 0), // "Japanese (JIS 0208-1990 and 0212-1990)"
639 MapCodePageDataItem( 20936, 936, "x-cp20936", 0), // "Chinese Simplified (GB2312-80)"
640 MapCodePageDataItem( 20949, 949, "x-cp20949", 0), // "Korean Wansung"
642 MapCodePageDataItem( 21025, 1251, "cp1025", 0), // "IBM EBCDIC (Cyrillic Serbian-Bulgarian)"
643 MapCodePageDataItem( 21866, 1251, "koi8-u", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Cyrillic (KOI8-U)"
644 MapCodePageDataItem( 28591, 1252, "iso-8859-1", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Western European (ISO)"
645 MapCodePageDataItem( 28592, 1250, "iso-8859-2", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Central European (ISO)"
646 MapCodePageDataItem( 28593, 1254, "iso-8859-3", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Latin 3 (ISO)"
647 MapCodePageDataItem( 28594, 1257, "iso-8859-4", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Baltic (ISO)"
648 MapCodePageDataItem( 28595, 1251, "iso-8859-5", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Cyrillic (ISO)"
649 MapCodePageDataItem( 28596, 1256, "iso-8859-6", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Arabic (ISO)"
650 MapCodePageDataItem( 28597, 1253, "iso-8859-7", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Greek (ISO)"
651 MapCodePageDataItem( 28598, 1255, "iso-8859-8", MIMECONTF_BROWSER | MIMECONTF_SAVABLE_BROWSER), // "Hebrew (ISO-Visual)"
652 MapCodePageDataItem( 28599, 1254, "iso-8859-9", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Turkish (ISO)"
653 // MapCodePageDataItem( 28603, 1257, "iso-8859-13", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Estonian (ISO)"
654 MapCodePageDataItem( 28605, 1252, "iso-8859-15", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Latin 9 (ISO)"
655 // MapCodePageDataItem( 29001, 1252, "x-Europa", 0), // "Europa"
656 MapCodePageDataItem( 38598, 1255, "iso-8859-8-i", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Hebrew (ISO-Logical)"
657 MapCodePageDataItem( 50220, 932, "iso-2022-jp", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Japanese (JIS)"
658 MapCodePageDataItem( 50221, 932, "|csISO2022JP|iso-2022-jp|iso-2022-jp", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Japanese (JIS-Allow 1 byte Kana)"
659 MapCodePageDataItem( 50222, 932, "iso-2022-jp", 0), // "Japanese (JIS-Allow 1 byte Kana - SO/SI)"
660 // MapCodePageDataItem( 50225, 949, "|iso-2022-kr|euc-kr|iso-2022-kr", MIMECONTF_MAILNEWS), // "Korean (ISO)"
661 // MapCodePageDataItem( 50227, 936, "x-cp50227", 0), // "Chinese Simplified (ISO-2022)"
662 //MapCodePageDataItem( 50229, 950, L"x-cp50229", L"x-cp50229", L"x-cp50229", 0}, // "Chinese Traditional (ISO-2022)"
663 //MapCodePageDataItem( 50930, 932, L"cp930", L"cp930", L"cp930", 0}, // "IBM EBCDIC (Japanese and Japanese Katakana)"
664 //MapCodePageDataItem( 50931, 932, L"x-EBCDIC-JapaneseAndUSCanada", L"x-EBCDIC-JapaneseAndUSCanada", L"x-EBCDIC-JapaneseAndUSCanada", 0}, // "IBM EBCDIC (Japanese and US-Canada)"
665 //MapCodePageDataItem( 50933, 949, L"cp933", L"cp933", L"cp933", 0}, // "IBM EBCDIC (Korean and Korean Extended)"
666 //MapCodePageDataItem( 50935, 936, L"cp935", L"cp935", L"cp935", 0}, // "IBM EBCDIC (Simplified Chinese)"
667 //MapCodePageDataItem( 50937, 950, L"cp937", L"cp937", L"cp937", 0}, // "IBM EBCDIC (Traditional Chinese)"
668 //MapCodePageDataItem( 50939, 932, L"cp939", L"cp939", L"cp939", 0}, // "IBM EBCDIC (Japanese and Japanese-Latin)"
669 MapCodePageDataItem( 51932, 932, "euc-jp", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Japanese (EUC)"
670 // MapCodePageDataItem( 51936, 936, "EUC-CN", 0), // "Chinese Simplified (EUC)"
671 MapCodePageDataItem( 51949, 949, "euc-kr", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Korean (EUC)"
672 // MapCodePageDataItem( 52936, 936, "hz-gb-2312", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Chinese Simplified (HZ)"
673 MapCodePageDataItem( 54936, 936, "GB18030", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Chinese Simplified (GB18030)"
674 MapCodePageDataItem( 57002, 57002, "x-iscii-de", 0), // "ISCII Devanagari"
675 MapCodePageDataItem( 57003, 57003, "x-iscii-be", 0), // "ISCII Bengali"
676 MapCodePageDataItem( 57004, 57004, "x-iscii-ta", 0), // "ISCII Tamil"
677 MapCodePageDataItem( 57005, 57005, "x-iscii-te", 0), // "ISCII Telugu"
678 MapCodePageDataItem( 57006, 57006, "x-iscii-as", 0), // "ISCII Assamese"
679 MapCodePageDataItem( 57007, 57007, "x-iscii-or", 0), // "ISCII Oriya"
680 MapCodePageDataItem( 57008, 57008, "x-iscii-ka", 0), // "ISCII Kannada"
681 MapCodePageDataItem( 57009, 57009, "x-iscii-ma", 0), // "ISCII Malayalam"
682 MapCodePageDataItem( 57010, 57010, "x-iscii-gu", 0), // "ISCII Gujarati"
683 MapCodePageDataItem( 57011, 57011, "x-iscii-pa", 0), // "ISCII Punjabi"
684 MapCodePageDataItem( 65000, 1200, "utf-7", MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS), // "Unicode (UTF-7)"
685 MapCodePageDataItem( 65001, 1200, "utf-8", MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER), // "Unicode (UTF-8)"
686 #endif // FEATURE_CORECLR
689 MapCodePageDataItem( 0, 0, null, 0),
695 #region "from coreclr/src/pal/inc/rt/palrt.h"
700 //enum tagMIMECONTF {
701 MIMECONTF_MAILNEWS = 0x1,
702 MIMECONTF_BROWSER = 0x2,
703 MIMECONTF_MINIMAL = 0x4,
704 MIMECONTF_IMPORT = 0x8,
705 MIMECONTF_SAVABLE_MAILNEWS = 0x100,
706 MIMECONTF_SAVABLE_BROWSER = 0x200,
707 MIMECONTF_EXPORT = 0x400,
708 MIMECONTF_PRIVCONVERTER = 0x10000,
709 MIMECONTF_VALID = 0x20000,
710 MIMECONTF_VALID_NLS = 0x40000,
711 MIMECONTF_MIME_IE4 = 0x10000000,
712 MIMECONTF_MIME_LATEST = 0x20000000,
713 MIMECONTF_MIME_REGISTRY = 0x40000000
719 #region "from referencesource/mscorlib/system/globalization/encodingtable.cs"
721 // Data table for encoding classes. Used by System.Text.Encoding.
722 // This class contains two hashtables to allow System.Text.Encoding
723 // to retrieve the data item either by codepage value or by webName.
726 // Only statics, does not need to be marked with the serializable attribute
727 internal static partial class EncodingTable
730 //This number is the size of the table in native. The value is retrieved by
731 //calling the native GetNumEncodingItems().
732 private static int lastEncodingItem = GetNumEncodingItems() - 1;
734 //This number is the size of the code page table. Its generated when we walk the table the first time.
735 private static volatile int lastCodePageItem;
739 // This points to a native data table which maps an encoding name to the correct code page.
742 unsafe internal static InternalEncodingDataItem* encodingDataPtr = GetEncodingData();
744 // This points to a native data table which stores the properties for the code page, and
745 // the table is indexed by code page.
748 unsafe internal static InternalCodePageDataItem* codePageDataPtr = GetCodePageData();
751 // This caches the mapping of an encoding name to a code page.
753 private static Hashtable hashByName = Hashtable.Synchronized(new Hashtable(StringComparer.OrdinalIgnoreCase));
755 // THe caches the data item which is indexed by the code page value.
757 private static Hashtable hashByCodePage = Hashtable.Synchronized(new Hashtable());
759 [System.Security.SecuritySafeCritical] // static constructors should be safe to call
760 static EncodingTable()
764 // Find the data item by binary searching the table that we have in native.
765 // nativeCompareOrdinalWC is an internal-only function.
766 [System.Security.SecuritySafeCritical] // auto-generated
767 unsafe private static int internalGetCodePageFromName(String name) {
769 int right = lastEncodingItem;
773 //Binary search the array until we have only a couple of elements left and then
774 //just walk those elements.
775 while ((right - left)>3) {
776 index = ((right - left)/2) + left;
778 result = String.Compare (name, encodingDataPtr[index].webName, StringComparison.OrdinalIgnoreCase);
781 //We found the item, return the associated codepage.
782 return (encodingDataPtr[index].codePage);
783 } else if (result<0) {
784 //The name that we're looking for is less than our current index.
787 //The name that we're looking for is greater than our current index
792 //Walk the remaining elements (it'll be 3 or fewer).
793 for (; left<=right; left++) {
794 if (String.Compare(name, encodingDataPtr[left].webName, StringComparison.OrdinalIgnoreCase) == 0) {
795 return (encodingDataPtr[left].codePage);
798 // The encoding name is not valid.
799 throw new ArgumentException(
801 CultureInfo.CurrentCulture,
802 Environment.GetResourceString("Argument_EncodingNotSupported"), name), "name");
805 // Return a list of all EncodingInfo objects describing all of our encodings
806 [System.Security.SecuritySafeCritical] // auto-generated
807 internal static unsafe EncodingInfo[] GetEncodings()
809 if (lastCodePageItem == 0)
812 for (count = 0; codePageDataPtr[count].codePage != 0; count++)
816 lastCodePageItem = count;
819 EncodingInfo[] arrayEncodingInfo = new EncodingInfo[lastCodePageItem];
822 for (i = 0; i < lastCodePageItem; i++)
824 arrayEncodingInfo[i] = new EncodingInfo(codePageDataPtr[i].codePage, CodePageDataItem.CreateString(codePageDataPtr[i].Names, 0),
825 Environment.GetResourceString("Globalization.cp_" + codePageDataPtr[i].codePage));
828 return arrayEncodingInfo;
831 /*=================================GetCodePageFromName==========================
832 **Action: Given a encoding name, return the correct code page number for this encoding.
833 **Returns: The code page for the encoding.
835 ** name the name of the encoding
837 ** ArgumentNullException if name is null.
838 ** internalGetCodePageFromName will throw ArgumentException if name is not a valid encoding name.
839 ============================================================================*/
841 internal static int GetCodePageFromName(String name)
844 throw new ArgumentNullException("name");
846 Contract.EndContractBlock();
851 // The name is case-insensitive, but ToLower isn't free. Check for
852 // the code page in the given capitalization first.
854 codePageObj = hashByName[name];
856 if (codePageObj!=null) {
857 return ((int)codePageObj);
860 //Okay, we didn't find it in the hash table, try looking it up in the
862 int codePage = internalGetCodePageFromName(name);
864 hashByName[name] = codePage;
869 [System.Security.SecuritySafeCritical] // auto-generated
870 unsafe internal static CodePageDataItem GetCodePageDataItem(int codepage) {
871 CodePageDataItem dataItem;
873 // We synchronize around dictionary gets/sets. There's still a possibility that two threads
874 // will create a CodePageDataItem and the second will clobber the first in the dictionary.
875 // However, that's acceptable because the contents are correct and we make no guarantees
878 //Look up the item in the hashtable.
879 dataItem = (CodePageDataItem)hashByCodePage[codepage];
881 //If we found it, return it.
882 if (dataItem!=null) {
887 //If we didn't find it, try looking it up now.
888 //If we find it, add it to the hashtable.
889 //This is a linear search, but we probably won't be doing it very often.
893 while ((data = codePageDataPtr[i].codePage) != 0) {
894 if (data == codepage) {
895 dataItem = new CodePageDataItem(i);
896 hashByCodePage[codepage] = dataItem;
902 //Nope, we didn't find it.
907 [System.Security.SecurityCritical] // auto-generated
908 [MethodImplAttribute(MethodImplOptions.InternalCall)]
909 private unsafe static extern InternalEncodingDataItem *GetEncodingData();
912 // Return the number of encoding data items.
914 [System.Security.SecurityCritical] // auto-generated
915 [MethodImplAttribute(MethodImplOptions.InternalCall)]
916 private static extern int GetNumEncodingItems();
918 [System.Security.SecurityCritical] // auto-generated
919 [MethodImplAttribute(MethodImplOptions.InternalCall)]
920 private unsafe static extern InternalCodePageDataItem* GetCodePageData();
922 [System.Security.SecurityCritical] // auto-generated
923 [MethodImplAttribute(MethodImplOptions.InternalCall)]
924 internal unsafe static extern byte* nativeCreateOpenFileMapping(
925 String inSectionName, int inBytesToAllocate, out IntPtr mappedFileHandle);
929 /*=================================InternalEncodingDataItem==========================
930 **Action: This is used to map a encoding name to a correct code page number. By doing this,
931 ** we can get the properties of this encoding via the InternalCodePageDataItem.
933 ** We use this structure to access native data exposed by the native side.
934 ============================================================================*/
936 [System.Runtime.InteropServices.StructLayout(LayoutKind.Sequential)]
937 internal struct InternalEncodingDataItem {
939 internal string webName;
940 internal UInt16 codePage;
943 /*=================================InternalCodePageDataItem==========================
944 **Action: This is used to access the properties related to a code page.
945 ** We use this structure to access native data exposed by the native side.
946 ============================================================================*/
948 [System.Runtime.InteropServices.StructLayout(LayoutKind.Sequential)]
949 internal struct InternalCodePageDataItem {
950 internal UInt16 codePage;
951 internal UInt16 uiFamilyCodePage;
954 internal string Names;