2006-11-20 Atsushi Enomoto <atsushi@ximian.com>
authorAtsushi Eno <atsushieno@gmail.com>
Mon, 20 Nov 2006 03:20:48 +0000 (03:20 -0000)
committerAtsushi Eno <atsushieno@gmail.com>
Mon, 20 Nov 2006 03:20:48 +0000 (03:20 -0000)
* ucm2cp.c: applying the missing patch from Bruno Haible (on bug
  #77325), and made some changes to handle 2.0 fallback.
* cvt.sh: updated some mappings to correct ones.

svn path=/trunk/mcs/; revision=68171

mcs/class/I18N/tools/ChangeLog
mcs/class/I18N/tools/cvt.sh
mcs/class/I18N/tools/ucm2cp.c

index 50a6e5a7b663669be58633a17af42e3a3a2437c7..b3eb9f51c9be3acffcac34aa11cc87ea82f28ec4 100644 (file)
@@ -1,3 +1,9 @@
+2006-11-20  Atsushi Enomoto <atsushi@ximian.com>
+
+       * ucm2cp.c: applying the missing patch from Bruno Haible (on bug
+         #77325), and made some changes to handle 2.0 fallback.
+       * cvt.sh: updated some mappings to correct ones.
+
 2006-01-24  Atsushi Enomoto <atsushi@ximian.com>
 
        * table.sh, table_from.cs, table_to.cs : imported a set of useful
index caa71f3d79fd7289abab110452fc3f862a628332..f23a3ba00450c243cdf2f44d880bca6734ddd16d 100755 (executable)
@@ -51,7 +51,7 @@ ${UCM2CP} --region West --page 437 --wpage 1252 \
        --webname IBM437 --bodyname IBM437 \
        --headername IBM437 --no-browser-display \
        --no-browser-save --no-mailnews-display \
-       --no-mailnews-save West/ibm-437.ucm >West/CP437.cs
+       --no-mailnews-save West/windows-437-2000.ucm >West/CP437.cs
 
 ${UCM2CP} --region Rare --page 500 --wpage 1252 \
        --name 'IBM EBCDIC (International)' \
@@ -318,7 +318,7 @@ ${UCM2CP} --region West --page 10079 --wpage 1252 \
        --webname x-mac-icelandic --bodyname x-mac-icelandic \
        --headername x-mac-icelandic --no-browser-display \
        --no-browser-save --no-mailnews-display \
-       --no-mailnews-save West/mac-is.ucm >West/CP10079.cs
+       --no-mailnews-save West/windows-10079-2000.ucm >West/CP10079.cs
 
 ${UCM2CP} --region Rare --page 20273 --wpage 1252 \
        --name 'IBM EBCDIC (Germany)' \
@@ -437,7 +437,7 @@ ${UCM2CP} --region Other --page 28595 --wpage 1251 \
 ${UCM2CP} --region MidEast --page 28596 --wpage 1256 \
        --name 'Arabic (ISO)' \
        --webname iso-8859-6 --bodyname iso-8859-6 \
-       --headername iso-8859-6 MidEast/ibm-1089.ucm >MidEast/CP28596.cs
+       --headername iso-8859-6 MidEast/windows-28596-2000.ucm >MidEast/CP28596.cs
 
 ${UCM2CP} --region West --page 28597 --wpage 1253 \
        --name 'Greek (ISO)' \
index fa43452b0789c1783227d80aad26fff9d2947e48..bb034ba46bea79fe186ae06bcca6d2f0ffc9b050 100644 (file)
@@ -1,7 +1,9 @@
 /*
- * ucm2cp.c - Convert IBM ".ucm" files into code page handling classes.
+ * ucm2cp.c - Convert IBM ".ucm" files or hexadecimal mapping ".TXT" files
+ * into code page handling classes.
  *
  * Copyright (c) 2002  Southern Storm Software, Pty Ltd
+ * Copyright (c) 2006  Bruno Haible
  *
  * Permission is hereby granted, free of charge, to any person obtaining
  * a copy of this software and associated documentation files (the "Software"),
@@ -162,7 +164,7 @@ int main(int argc, char *argv[])
                windowsCodePage = codePage;
        }
 
-       /* Open the UCM file */
+       /* Open the UCM or TXT file */
        file = fopen(argv[1], "r");
        if(!file)
        {
@@ -258,10 +260,11 @@ static int parseHex(const char *buf, unsigned long *value)
 }
 
 /*
- * Load the character mapping information from a UCM file.
+ * Load the character mapping information from a UCM or TXT file.
  */
 static void loadCharMaps(FILE *file)
 {
+       enum { unknown, ucm, txt } syntax;
        unsigned long posn;
        unsigned long byteValue;
        int level;
@@ -279,45 +282,86 @@ static void loadCharMaps(FILE *file)
                charToByte[posn] = -1;
        }
 
+       syntax = unknown;
+
        /* Read the contents of the file */
        while(fgets(buffer, BUFSIZ, file))
        {
-               /* Lines of interest begin with "<U" */
-               if(buffer[0] != '<' || buffer[1] != 'U')
+               /* Syntax recognition */
+               if (syntax == unknown)
                {
-                       continue;
+                       if (memcmp(buffer, "CHARMAP", 7) == 0)
+                               syntax = ucm;
+                       else if (memcmp(buffer, "0x", 2) == 0)
+                               syntax = txt;
                }
 
-               /* Parse the fields on the line */
-               buf = buffer + 2;
-               buf += parseHex(buf, &posn);
-               if(posn >= 65536)
-               {
-                       continue;
-               }
-               while(*buf != '\0' && *buf != '\\')
+               if (syntax == ucm)
                {
-                       ++buf;
-               }
-               if(*buf != '\\' || buf[1] != 'x')
-               {
-                       continue;
-               }
-               buf += 2;
-               buf += parseHex(buf, &byteValue);
-               if(byteValue >= 256)
-               {
-                       continue;
+                       /* Lines of interest begin with "<U" */
+                       if(buffer[0] != '<' || buffer[1] != 'U')
+                       {
+                               continue;
+                       }
+
+                       /* Parse the fields on the line */
+                       buf = buffer + 2;
+                       buf += parseHex(buf, &posn);
+                       if(posn >= 65536)
+                       {
+                               continue;
+                       }
+                       while(*buf != '\0' && *buf != '\\')
+                       {
+                               ++buf;
+                       }
+                       if(*buf != '\\' || buf[1] != 'x')
+                       {
+                               continue;
+                       }
+                       buf += 2;
+                       buf += parseHex(buf, &byteValue);
+                       if(byteValue >= 256)
+                       {
+                               continue;
+                       }
+                       while(*buf != '\0' && *buf != '|')
+                       {
+                               ++buf;
+                       }
+                       if(*buf != '|')
+                       {
+                               continue;
+                       }
+                       level = (int)(buf[1] - '0');
                }
-               while(*buf != '\0' && *buf != '|')
+               else
+               if (syntax == txt)
                {
-                       ++buf;
+                       unsigned int x;
+                       int cnt;
+
+                       /* Lines of interest begin with "0x" */
+                       if(buffer[0] != '0' || buffer[1] != 'x')
+                               continue;
+
+                       /* Parse the fields on the line */
+                       if(sscanf(buffer, "0x%x%n", &x, &cnt) <= 0)
+                               exit(1);
+                       if(!(x < 0x100))
+                               exit(1);
+                       byteValue = x;
+                       while (buffer[cnt] == ' ' || buffer[cnt] == '\t')
+                               cnt++;
+                       if(sscanf(buffer+cnt, "0x%x", &x) != 1)
+                               continue;
+                       if(!(x < 0x10000))
+                               exit(1);
+                       posn = x;
+                       level = 0;
                }
-               if(*buf != '|')
-               {
+               else
                        continue;
-               }
-               level = (int)(buf[1] - '0');
 
                /* Update the byte->char mapping table */
                if(level < byteToCharLevel[byteValue])
@@ -364,7 +408,9 @@ static void printHeader(void)
        printf("// Generated from \"%s\".\n\n", filename);
        printf("namespace I18N.%s\n{\n\n", region);
        printf("using System;\n");
+       printf("using System.Text;\n");
        printf("using I18N.Common;\n\n");
+       printf("[Serializable]\n");
        printf("public class CP%d : ByteEncoding\n{\n", codePage);
        printf("\tpublic CP%d()\n", codePage);
        printf("\t\t: base(%d, ToChars, \"%s\",\n", codePage, name);
@@ -408,6 +454,7 @@ static void printEncodingName(const char *name)
 static void printFooter(void)
 {
        printf("}; // class CP%d\n\n", codePage);
+       printf("[Serializable]\n");
        printf("public class ENC");
        printEncodingName(webName);
        printf(" : CP%d\n{\n", codePage);
@@ -441,7 +488,7 @@ static void printByteToChar(void)
  * Print a "switch" statement that converts "ch" from
  * a character value into a byte value.
  */
-static void printConvertSwitch(void)
+static void printConvertSwitch(int forString)
 {
        unsigned long directLimit;
        unsigned long posn;
@@ -548,7 +595,17 @@ static void printConvertSwitch(void)
        /* Print the switch footer */
        if(!haveFullWidth)
        {
-               printf("\t\t\t\tdefault: ch = 0x3F; break;\n");
+               if(forString)
+                       printf("\t\t\t\tdefault: ch = 0x3F; break;\n");
+               else {
+                       printf("\t\t\t\tdefault:\n");
+                       printf("#if NET_2_0\n");
+                       printf("\t\t\t\t\tHandleFallback (ref buffer, chars, ref charIndex, ref charCount, bytes, ref byteIndex, ref byteCount);\n");
+                       printf("#else\n");
+                       printf("\t\t\t\t\t\tch = 0x3F;\n");
+                       printf("#endif\n");
+                       printf("\t\t\t\t\tbreak;\n");
+               }
        }
        else
        {
@@ -557,7 +614,15 @@ static void printConvertSwitch(void)
                printf("\t\t\t\t\tif(ch >= 0xFF01 && ch <= 0xFF5E)\n");
                printf("\t\t\t\t\t\tch -= 0xFEE0;\n");
                printf("\t\t\t\t\telse\n");
-               printf("\t\t\t\t\t\tch = 0x3F;\n");
+               if(forString) /* this is basically meaningless, just to make diff for unused code minimum */
+                       printf("\t\t\t\t\t\tch = 0x3F;\n");
+               else {
+                       printf("#if NET_2_0\n");
+                       printf("\t\t\t\t\t\tHandleFallback (ref buffer, chars, ref charIndex, ref charCount, bytes, ref byteIndex, ref byteCount);\n");
+                       printf("#else\n");
+                       printf("\t\t\t\t\t\tch = 0x3F;\n");
+                       printf("#endif\n");
+               }
                printf("\t\t\t\t}\n");
                printf("\t\t\t\tbreak;\n");
        }
@@ -570,20 +635,27 @@ static void printConvertSwitch(void)
 static void printCharToByte(void)
 {
        /* Print the conversion method for character buffers */
-       printf("\tprotected override void ToBytes(char[] chars, int charIndex, int charCount,\n");
-       printf("\t                                byte[] bytes, int byteIndex)\n");
+       printf("\tprotected unsafe override void ToBytes(char* chars, int charCount,\n");
+       printf("\t                                byte* bytes, int byteCount)\n");
        printf("\t{\n");
        printf("\t\tint ch;\n");
+       printf("\t\tint charIndex = 0;\n");
+       printf("\t\tint byteIndex = 0;\n");
+       printf("#if NET_2_0\n");
+       printf("\t\tEncoderFallbackBuffer buffer = null;\n");
+       printf("#endif\n");
        printf("\t\twhile(charCount > 0)\n");
        printf("\t\t{\n");
        printf("\t\t\tch = (int)(chars[charIndex++]);\n");
-       printConvertSwitch();
+       printConvertSwitch(0);
        printf("\t\t\tbytes[byteIndex++] = (byte)ch;\n");
        printf("\t\t\t--charCount;\n");
+       printf("\t\t\t--byteCount;\n");
        printf("\t\t}\n");
        printf("\t}\n\n");
 
        /* Print the conversion method for string buffers */
+       printf("\t/*\n");
        printf("\tprotected override void ToBytes(String s, int charIndex, int charCount,\n");
        printf("\t                                byte[] bytes, int byteIndex)\n");
        printf("\t{\n");
@@ -591,9 +663,10 @@ static void printCharToByte(void)
        printf("\t\twhile(charCount > 0)\n");
        printf("\t\t{\n");
        printf("\t\t\tch = (int)(s[charIndex++]);\n");
-       printConvertSwitch();
+       printConvertSwitch(1);
        printf("\t\t\tbytes[byteIndex++] = (byte)ch;\n");
        printf("\t\t\t--charCount;\n");
        printf("\t\t}\n");
-       printf("\t}\n\n");
+       printf("\t}\n");
+       printf("\t*/\n\n");
 }