2006-01-24 Atsushi Enomoto <atsushi@ximian.com>
authorAtsushi Eno <atsushieno@gmail.com>
Tue, 24 Jan 2006 10:11:16 +0000 (10:11 -0000)
committerAtsushi Eno <atsushieno@gmail.com>
Tue, 24 Jan 2006 10:11:16 +0000 (10:11 -0000)
* table.sh, table_from.cs, table_to.cs : imported a set of useful
  tool from bug #77325 (thanks to Bruno Haible).

svn path=/trunk/mcs/; revision=55977

mcs/class/I18N/tools/ChangeLog [new file with mode: 0644]
mcs/class/I18N/tools/table.sh [new file with mode: 0644]
mcs/class/I18N/tools/table_from.cs [new file with mode: 0644]
mcs/class/I18N/tools/table_to.cs [new file with mode: 0644]

diff --git a/mcs/class/I18N/tools/ChangeLog b/mcs/class/I18N/tools/ChangeLog
new file mode 100644 (file)
index 0000000..50a6e5a
--- /dev/null
@@ -0,0 +1,4 @@
+2006-01-24  Atsushi Enomoto <atsushi@ximian.com>
+
+       * table.sh, table_from.cs, table_to.cs : imported a set of useful
+         tool from bug #77325 (thanks to Bruno Haible).
diff --git a/mcs/class/I18N/tools/table.sh b/mcs/class/I18N/tools/table.sh
new file mode 100644 (file)
index 0000000..27b8638
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+charset=$1
+echo Extracting $charset
+mono table_from.exe $charset > $charset.TXT
+mono table_to.exe $charset | sort > $charset.INVERSE.TXT
diff --git a/mcs/class/I18N/tools/table_from.cs b/mcs/class/I18N/tools/table_from.cs
new file mode 100644 (file)
index 0000000..9f7ac76
--- /dev/null
@@ -0,0 +1,106 @@
+/**
+ * Create a table from CHARSET to Unicode.
+ *
+ * @author Bruno Haible
+ */
+
+using System; /* String, Console */
+using System.Text; /* Encoding */
+
+public class table_from {
+  static String toHexString1 (int i) {
+    return new String(new char[] { "0123456789ABCDEF".get_Chars(i) });
+  }
+  static String toHexString2 (int i) {
+    return  toHexString1((i>>4)&0x0f)
+           +toHexString1(i&0x0f);
+  }
+  static String toHexString4 (int i) {
+    return  toHexString1((i>>12)&0x0f)
+           +toHexString1((i>>8)&0x0f)
+           +toHexString1((i>>4)&0x0f)
+           +toHexString1(i&0x0f);
+  }
+  static void printOutput(char[] outp) {
+    Console.Out.Write("0x");
+    for (int j = 0; j < outp.Length; j++) {
+      if (j > 0)
+        Console.Out.Write(" 0x");
+      if (j+1 < outp.Length
+          && outp[j] >= 0xd800 && outp[j] < 0xdc00
+          && outp[j+1] >= 0xdc00 && outp[j+1] < 0xe000) {
+        int c = 0x10000 + ((outp[j] - 0xd800) << 10) + (outp[j+1] - 0xdc00);
+        Console.Out.Write(((Int32)c).ToString("X"));
+        j++;
+      } else
+        Console.Out.Write(toHexString4(outp[j]));
+    }
+  }
+  public static int Main (String[] args) {
+    try {
+      if (args.Length != 1) {
+        Console.Error.WriteLine("Usage: mono table_from charset");
+        return 1;
+      }
+      String charset = args[0];
+      Encoding encoding;
+      try {
+        encoding = Encoding.GetEncoding(charset);
+      } catch (NotSupportedException e) {
+        Console.Error.WriteLine("no converter for "+charset);
+        return 1;
+      }
+      byte[] qmark = encoding.GetBytes(new char[] { (char)0x003f });
+      for (int i0 = 0; i0 < 0x100; i0++) {
+        char[] outp = encoding.GetChars(new byte[] { (byte)i0 });
+        if (outp.Length > 0
+            && !(outp.Length >= 1 && outp[0] == 0x003f
+                 && !(qmark.Length == 1 && i0 == qmark[0]))) {
+          Console.Out.Write("0x"+toHexString2(i0)+"\t");
+          printOutput(outp);
+          Console.Out.WriteLine();
+        } else if (outp.Length <= 1) {
+          for (int i1 = 0; i1 < 0x100; i1++) {
+            outp = encoding.GetChars(new byte[] { (byte)i0, (byte)i1 });
+            if (outp.Length > 0
+                && !(outp.Length >= 1 && outp[0] == 0x003f
+                     && !(qmark.Length == 2 && i0 == qmark[0] && i1 == qmark[1]))) {
+              Console.Out.Write("0x"+toHexString2(i0)+toHexString2(i1)+"\t");
+              printOutput(outp);
+              Console.Out.WriteLine();
+            } else if (outp.Length <= 1) {
+              for (int i2 = 0; i2 < 0x100; i2++) {
+                outp = encoding.GetChars(new byte[] { (byte)i0, (byte)i1, (byte)i2 });
+                if (outp.Length > 0
+                    && !(outp.Length >= 1 && outp[0] == 0x003f
+                         && !(qmark.Length == 3
+                              && i0 == qmark[0] && i1 == qmark[1] && i2 == qmark[2]))) {
+                  Console.Out.Write("0x"+toHexString2(i0)+toHexString2(i1)+toHexString2(i2)+"\t");
+                  printOutput(outp);
+                  Console.Out.WriteLine();
+                } else if (outp.Length <= 1) {
+                  for (int i3 = 0; i3 < 0x100; i3++) {
+                    outp = encoding.GetChars(new byte[] { (byte)i0, (byte)i1, (byte)i2, (byte)i3 });
+                    if (outp.Length > 0
+                        && !(outp.Length >= 1 && outp[0] == 0x003f
+                             && !(qmark.Length == 4
+                                  && i0 == qmark[0] && i1 == qmark[1] && i2 == qmark[2] && i3 == qmark[3]))) {
+                      Console.Out.Write("0x"+toHexString2(i0)+toHexString2(i1)+toHexString2(i2)+toHexString2(i3)+"\t");
+                      printOutput(outp);
+                      Console.Out.WriteLine();
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    } catch (Exception e) {
+      Console.Error.WriteLine(e);
+      Console.Error.WriteLine(e.StackTrace);
+      return 1;
+    }
+    return 0;
+  }
+}
diff --git a/mcs/class/I18N/tools/table_to.cs b/mcs/class/I18N/tools/table_to.cs
new file mode 100644 (file)
index 0000000..399b19a
--- /dev/null
@@ -0,0 +1,66 @@
+/**
+ * Create a table from Unicode to CHARSET.
+ *
+ * @author Bruno Haible
+ */
+
+using System; /* String, Console */
+using System.Text; /* Encoding */
+
+public class table_to {
+  static String toHexString1 (int i) {
+    return new String(new char[] { "0123456789ABCDEF".get_Chars(i) });
+  }
+  static String toHexString2 (int i) {
+    return  toHexString1((i>>4)&0x0f)
+           +toHexString1(i&0x0f);
+  }
+  static String toHexString4 (int i) {
+    return  toHexString1((i>>12)&0x0f)
+           +toHexString1((i>>8)&0x0f)
+           +toHexString1((i>>4)&0x0f)
+           +toHexString1(i&0x0f);
+  }
+  public static int Main (String[] args) {
+    try {
+      if (args.Length != 1) {
+        Console.Error.WriteLine("Usage: mono table_to charset");
+        return 1;
+      }
+      String charset = args[0];
+      Encoding encoding;
+      try {
+        encoding = Encoding.GetEncoding(charset);
+      } catch (NotSupportedException e) {
+        Console.Error.WriteLine("no converter for "+charset);
+        return 1;
+      }
+      byte[] qmark = encoding.GetBytes(new char[] { (char)0x003f });
+      for (int i = 0; i < 0x110000; i++) {
+        char[] inp =
+          (i < 0x10000
+           ? new char[] { (char)i }
+           : new char[] { (char)(0xd800 + ((i - 0x10000) >> 10)),
+                          (char)(0xdc00 + ((i - 0x10000) & 0x3ff)) });
+        byte[] outp = encoding.GetBytes(inp);
+        if (!(((outp.Length >= qmark.Length
+                && outp[0] == qmark[0]
+                && (qmark.Length < 2 || outp[1] == qmark[1])
+                && (qmark.Length < 3 || outp[2] == qmark[2])
+                && (qmark.Length < 4 || outp[3] == qmark[3]))
+               || (outp.Length >= 1 && outp[0] == 0x3f))
+              && !(i == 0x003f))) {
+          Console.Out.Write("0x");
+          for (int j = 0; j < outp.Length; j++)
+            Console.Out.Write(toHexString2(outp[j]));
+          Console.Out.WriteLine("\t0x" + (i<0x10000 ? toHexString4(i) : ((Int32)i).ToString("X")));
+        }
+      }
+    } catch (Exception e) {
+      Console.Error.WriteLine(e);
+      Console.Error.WriteLine(e.StackTrace);
+      return 1;
+    }
+    return 0;
+  }
+}