2008-11-04 Atsushi Enomoto <atsushi@ximian.com>
authorAtsushi Eno <atsushieno@gmail.com>
Tue, 4 Nov 2008 00:22:50 +0000 (00:22 -0000)
committerAtsushi Eno <atsushieno@gmail.com>
Tue, 4 Nov 2008 00:22:50 +0000 (00:22 -0000)
* ucs.cs : new code to generate unicode table for eglib.

svn path=/trunk/mcs/; revision=117829

mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
mcs/class/corlib/Mono.Globalization.Unicode/ucd.cs [new file with mode: 0755]

index b3d7889ebfed9727c5960f5bcc36fb012e8a992a..2385d59a155e420a1d1ea53547ac592ff75c2d94 100644 (file)
@@ -1,3 +1,7 @@
+2008-11-04  Atsushi Enomoto  <atsushi@ximian.com>
+
+       * ucs.cs : new code to generate unicode table for eglib.
+
 2008-07-04  Andreas Nahr <ClassDevelopment@A-SoftTech.com>
 
        * SortKey: Fix parameter names, add attribute, small formatting
diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/ucd.cs b/mcs/class/corlib/Mono.Globalization.Unicode/ucd.cs
new file mode 100755 (executable)
index 0000000..ddab8a4
--- /dev/null
@@ -0,0 +1,609 @@
+//
+// UCD.cs
+//
+// Author:
+//     Atsushi Enomoto  <atsushi@ximian.com>
+//
+// Copyright (C) 2008 Novell, Inc.
+//
+
+//
+// Unicode table generator for eglib.
+// Note that this code is only for Unicode 5.1.0 or earlier.
+// (regarding character ranges)
+//
+// Some premises:
+// - lower-band (0000-FFFF) characters never has case mapping to higher-band
+//   characters. Hence, simple upper/lower mapping is divided into 16-bit and
+//   32-bit tables.
+//
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Reflection;
+
+namespace Mono.Globalization.Unicode
+{
+       public class Driver
+       {
+               public static void Main (string [] args)
+               {
+                       TextWriter w = Console.Out;
+                       w.NewLine = "\n";
+
+                       w.WriteLine (@"/*
+This file is automatically generated by {0}.exe.
+The source for this generator should be in Mono repository
+(mcs/class/corlib/Mono.Globalization.Unicode directory).
+*/
+
+#ifndef __UNICODE_DATA_H
+#define __UNICODE_DATA_H
+
+#include <glib.h>
+
+", Assembly.GetEntryAssembly ().GetName ().Name);
+                       var ud = new UnicodeData5_1_0 ();
+                       var ucd = ud.ParseFile (args [0]);
+                       var ucg = new UnicodeDataCodeGeneratorC5_1_0 (ud, w);
+                       ucg.GenerateStructures ();
+                       w.WriteLine ();
+                       ucg.GenerateUnicodeCategoryListC (ucd);
+                       w.WriteLine ();
+                       ucg.GenerateSimpleCaseMappingListC (ucd);
+                       w.WriteLine ();
+                       ucg.GenerateSimpleTitlecaseMappingListC (ucd);
+                       w.WriteLine (@"
+#endif
+");
+               }
+       }
+
+       public class UnicodeData5_1_0 : UnicodeData
+       {
+               public override CodePointRange [] SimpleCases {
+                       get { return simple_cases; }
+               }
+
+               public override CodePointRange [] CategoryRanges {
+                       get { return category_ranges; }
+               }
+
+               static readonly CodePointRange [] simple_cases = {
+                       new CodePointRange (0x0040, 0x0600),
+                       new CodePointRange (0x1000, 0x10D0),
+                       new CodePointRange (0x1D00, 0x2000),
+                       new CodePointRange (0x2100, 0x21C0),
+                       new CodePointRange (0x2480, 0x2500),
+                       new CodePointRange (0x2C00, 0x2D80),
+                       new CodePointRange (0xA640, 0xA7C0),
+                       new CodePointRange (0xFF20, 0xFF80),
+                       new CodePointRange (0x10400, 0x10480),
+                       };
+
+               static readonly CodePointRange [] category_ranges = {
+                       new CodePointRange (0x0000, 0x3400),
+                       // 3400-4DB5: OtherLetter
+                       new CodePointRange (0x4DC0, 0x4E00),
+                       // 4E00-9FC3: OtherLetter
+                       new CodePointRange (0xA000, 0xAA80),
+                       // AC00-D7A3: OtherLetter
+                       // D800-DFFF: OtherSurrogate
+                       // E000-F8FF: OtherPrivateUse
+                       new CodePointRange (0xF900, 0x10000),
+                       new CodePointRange (0x10000, 0x104C0),
+                       new CodePointRange (0x10800, 0x10A80),
+                       new CodePointRange (0x12000, 0x12480),
+                       new CodePointRange (0x1D000, 0x1D800),
+                       new CodePointRange (0x1F000, 0x1F0C0),
+                       // 20000-2A6D6 OtherLetter
+                       new CodePointRange (0x2F800, 0x2FA40),
+                       new CodePointRange (0xE0000, 0xE0200),
+                       // F0000-FFFFD OtherPrivateUse
+                       // 100000-10FFFD OtherPrivateUse
+                       };
+       }
+
+       public abstract class UnicodeData
+       {
+               public abstract CodePointRange [] SimpleCases { get; }
+
+               public abstract CodePointRange [] CategoryRanges { get; }
+
+               public virtual UcdCharacterProperty [] ParseFile (string file)
+               {
+                       var d = new List<KeyValuePair<int,UcdCharacterProperty>> ();
+
+                       using (TextReader r = File.OpenText (file)) {
+                               while (r.Peek () >= 0) {
+                                       var l = r.ReadLine ();
+                                       if (l.Length > 0 && l [0] != '#') {
+                                               var u = Parse (l);
+                                               d.Add (new KeyValuePair<int,UcdCharacterProperty> (u.Codepoint, u));
+                                       }
+                               }
+                       }
+                       var list = new List<UcdCharacterProperty> ();
+                       foreach (var p in d)
+                               list.Add (p.Value);
+                       return list.ToArray ();
+               }
+
+               UcdCharacterProperty Parse (string line)
+               {
+                       string [] tokens = line.Split (';');
+                       string [] decomp = tokens [5].Length > 0 ? tokens [5].Split (' ') : null;
+                       string decomp_type = decomp != null && decomp [0] [0] == '<' ? decomp [0] : null;
+                       if (decomp_type != null) {
+                               for (int i = 1; i < decomp.Length; i++)
+                                       decomp [i - 1] = decomp [i];
+                               Array.Resize (ref decomp, decomp.Length - 1);
+                       }
+
+                       return new UcdCharacterProperty () {
+                               Codepoint = int.Parse (tokens [0], NumberStyles.HexNumber),
+                               Name = tokens [1],
+                               Category = ParseUnicodeCategory (tokens [2]),
+                               CanonicalCombiningClass = tokens [3].Length > 0 ? (byte?) byte.Parse (tokens [3]) : null,
+                               BidiClass = tokens [4].Length > 0 ? (UcdBidiClass) Enum.Parse (typeof (UcdBidiClass), tokens [4]) : UcdBidiClass.None,
+                               DecompositionType = decomp_type != null ? ParseDecompositionType (decomp_type) : UcdDecompositionType.None,
+                               DecompositionMapping = decomp != null ? Array.ConvertAll<string,int> (decomp, dv => int.Parse (dv, NumberStyles.HexNumber)) : null,
+                               DecimalDigitValue = tokens [6],
+                               DigitValue = tokens [7],
+                               NumericValue = tokens [8],
+                               BidiMirrored = (tokens [9] == "Y"),
+                               Unicode1Name = tokens [10],
+                               IsoComment = tokens [11],
+                               SimpleUppercaseMapping = tokens [12].Length > 0 ? int.Parse (tokens [12], NumberStyles.HexNumber) : 0,
+                               SimpleLowercaseMapping = tokens [13].Length > 0 ? int.Parse (tokens [13], NumberStyles.HexNumber) : 0,
+                               SimpleTitlecaseMapping = tokens [14].Length > 0 ? int.Parse (tokens [14], NumberStyles.HexNumber) : 0,
+                               };
+               }
+
+               UcdDecompositionType ParseDecompositionType (string s)
+               {
+                       switch (s) {
+                       case "<font>":
+                               return UcdDecompositionType.Font;
+                       case "<noBreak>":
+                               return UcdDecompositionType.NoBreak;
+                       case "<initial>":
+                               return UcdDecompositionType.Initial;
+                       case "<medial>":
+                               return UcdDecompositionType.Medial;
+                       case "<final>":
+                               return UcdDecompositionType.Final;
+                       case "<isolated>":
+                               return UcdDecompositionType.Isolated;
+                       case "<circle>":
+                               return UcdDecompositionType.Circle;
+                       case "<super>":
+                               return UcdDecompositionType.Super;
+                       case "<sub>":
+                               return UcdDecompositionType.Sub;
+                       case "<vertical>":
+                               return UcdDecompositionType.Vertical;
+                       case "<wide>":
+                               return UcdDecompositionType.Wide;
+                       case "<narrow>":
+                               return UcdDecompositionType.Narrow;
+                       case "<small>":
+                               return UcdDecompositionType.Small;
+                       case "<square>":
+                               return UcdDecompositionType.Square;
+                       case "<fraction>":
+                               return UcdDecompositionType.Fraction;
+                       case "<compat>":
+                               return UcdDecompositionType.Compat;
+                       }
+                       throw new ArgumentException (String.Format ("Unexpected decomposition type '{0}'", s));
+               }
+
+               UnicodeCategory ParseUnicodeCategory (string s)
+               {
+                       switch (s) {
+                       case "Lu":
+                               return UnicodeCategory.UppercaseLetter;
+                       case "Ll":
+                               return UnicodeCategory.LowercaseLetter;
+                       case "Lt":
+                               return UnicodeCategory.TitlecaseLetter;
+                       case "Lm":
+                               return UnicodeCategory.ModifierLetter;
+                       case "Lo":
+                               return UnicodeCategory.OtherLetter;
+                       case "Mn":
+                               return UnicodeCategory.NonSpacingMark;
+                       case "Mc":
+                               return UnicodeCategory.SpacingCombiningMark;
+                       case "Me":
+                               return UnicodeCategory.EnclosingMark;
+                       case "Nd":
+                               return UnicodeCategory.DecimalDigitNumber;
+                       case "Nl":
+                               return UnicodeCategory.LetterNumber;
+                       case "No":
+                               return UnicodeCategory.OtherNumber;
+                       case "Pc":
+                               return UnicodeCategory.ConnectorPunctuation;
+                       case "Pd":
+                               return UnicodeCategory.DashPunctuation;
+                       case "Ps":
+                               return UnicodeCategory.OpenPunctuation;
+                       case "Pe":
+                               return UnicodeCategory.ClosePunctuation;
+                       case "Pi":
+                               return UnicodeCategory.InitialQuotePunctuation;
+                       case "Pf":
+                               return UnicodeCategory.FinalQuotePunctuation;
+                       case "Po":
+                               return UnicodeCategory.OtherPunctuation;
+                       case "Sm":
+                               return UnicodeCategory.MathSymbol;
+                       case "Sc":
+                               return UnicodeCategory.CurrencySymbol;
+                       case "Sk":
+                               return UnicodeCategory.ModifierSymbol;
+                       case "So":
+                               return UnicodeCategory.OtherSymbol;
+                       case "Zs":
+                               return UnicodeCategory.SpaceSeparator;
+                       case "Zl":
+                               return UnicodeCategory.LineSeparator;
+                       case "Zp":
+                               return UnicodeCategory.ParagraphSeparator;
+                       case "Cc":
+                               return UnicodeCategory.Control;
+                       case "Cf":
+                               return UnicodeCategory.Format;
+                       case "Cs":
+                               return UnicodeCategory.Surrogate;
+                       case "Co":
+                               return UnicodeCategory.PrivateUse;
+                       case "Cn":
+                               return UnicodeCategory.OtherNotAssigned;
+                       }
+                       throw new ArgumentException (String.Format ("Unexpected category {0}", s));
+               }
+       }
+
+       public class UnicodeDataCodeGeneratorC5_1_0
+       {
+               UnicodeData catalog;
+               TextWriter w;
+
+               public UnicodeDataCodeGeneratorC5_1_0 (UnicodeData catalog, TextWriter writer)
+               {
+                       this.catalog = catalog;
+                       w = writer;
+               }
+
+               public void GenerateStructures ()
+               {
+                       w.WriteLine ("/* ======== Structures ======== */");
+                       w.WriteLine (@"typedef struct {
+       guint32 codepoint;
+       guint32 upper;
+       guint32 title;
+} SimpleTitlecaseMapping;");
+                       w.WriteLine (@"typedef struct {
+       guint32 start;
+       guint32 end;
+} CodePointRange;");
+                       w.WriteLine (@"typedef struct {
+       guint32 upper;
+       guint32 lower;
+} SimpleCaseMapping;");
+               }
+
+               void GenerateCodePointRanges (string name, CodePointRange [] ranges)
+               {
+                       w.WriteLine ("static const guint8 {0}_count = {1};", name, ranges.Length);
+                       w.WriteLine ("static const CodePointRange {0} [] = {{", name);
+                       foreach (var cpr in ranges)
+                               w.WriteLine ("{{0x{0:X06}, 0x{1:X06}}},", cpr.Start, cpr.End);
+                       w.WriteLine ("{0, 0}};");
+               }
+
+               public void GenerateUnicodeCategoryListC (UcdCharacterProperty [] ucd)
+               {
+                       w.WriteLine ("/* ======== Unicode Categories ======== */");
+                       GenerateCodePointRanges ("unicode_category_ranges", catalog.CategoryRanges);
+
+                       int table = 0;
+                       foreach (var cpr in catalog.CategoryRanges) {
+                               w.WriteLine ("const GUnicodeType unicode_category_table{0} [] = {{", table);
+                               w.WriteLine ("\t/* ==== {0:X}-{1:X} ==== */", cpr.Start, cpr.End);
+                               w.Write ("\t");
+                               int cp = cpr.Start;
+                               foreach (var ucp in ucd) {
+                                       if (ucp.Codepoint >= cpr.End)
+                                               break;
+                                       if (ucp.Codepoint < cp)
+                                               continue;
+                                       while (cp < ucp.Codepoint) {
+                                               w.Write ("0,");
+                                               if (++cp % 16 == 0)
+//                                                     w.Write ("\n/* ==== {0:X} ==== */\n\t", cp);
+                                                       w.Write ("\n\t", cp);
+                                       }
+                                       w.Write ((int) ToGUnicodeCategory (ucp.Category));
+                                       w.Write (',');
+                                       if (++cp % 16 == 0)
+//                                             w.Write ("\n/* ==== {0:X} ==== */\n\t", cp);
+                                               w.Write ("\n\t", cp);
+                                       if (cp >= cpr.End)
+                                               break;
+                               }
+                               w.WriteLine ("0};");
+                               table++;
+                       }
+
+                       w.WriteLine ("static const GUnicodeType *unicode_category [{0}]  = {{", catalog.CategoryRanges.Length);
+                       for (int i = 0, end = catalog.CategoryRanges.Length; i < end; i++)
+                               w.WriteLine ("\tunicode_category_table{0}{1}", i, i + 1 < end ? "," : String.Empty);
+                       w.WriteLine ("};");
+               }
+
+               public void GenerateSimpleTitlecaseMappingListC (UcdCharacterProperty [] ucd)
+               {
+                       w.WriteLine ("static const SimpleTitlecaseMapping simple_titlecase_mapping [] = {");
+                       int count = 0;
+                       foreach (var ucp in ucd) {
+                               if (ucp.SimpleUppercaseMapping == ucp.SimpleTitlecaseMapping)
+                                       continue;
+                               if (count > 0)
+                                       w.WriteLine (',');
+                               w.Write ("\t{{0x{0:X06}, 0x{1:X06}, 0x{2:X06}}}", ucp.Codepoint, ucp.SimpleUppercaseMapping, ucp.SimpleTitlecaseMapping);
+                               count++;
+                       }
+                       w.WriteLine ();
+                       w.WriteLine ("};");
+                       w.WriteLine ("static const guint8 simple_titlecase_mapping_count = {0};", count);
+               }
+
+               public void GenerateSimpleCaseMappingListC (UcdCharacterProperty [] ucd)
+               {
+                       GenerateCodePointRanges ("simple_case_map_ranges", catalog.SimpleCases);
+                       GenerateSimpleCaseMappingListC (ucd, true, true);
+                       GenerateSimpleCaseMappingListC (ucd, true, false);
+                       GenerateSimpleCaseMappingListC (ucd, false, true);
+                       GenerateSimpleCaseMappingListC (ucd, false, false);
+               }
+
+               void GenerateSimpleCaseMappingListC (UcdCharacterProperty [] ucd, bool upper, bool small)
+               {
+                       int nTable = 0;
+                       foreach (var cpr in catalog.SimpleCases) {
+                               if (small && cpr.Start > 0xFFFF)
+                                       break;
+                               if (!small && cpr.Start < 0x10000)
+                                       continue;
+
+                               w.WriteLine ("static const {0} simple_{1}_case_mapping_{2}_table{3} [] = {{", small ? "guint16" : "guint32", upper ? "upper" : "lower", small ? "lowarea" : "higharea", nTable);
+
+
+                               w.WriteLine ("\t/* ==== {0:X}-{1:X} ==== */", cpr.Start, cpr.End);
+                               w.Write ("\t");
+                               int cp = cpr.Start;
+                               foreach (var ucp in ucd) {
+                                       if (ucp.Codepoint >= cpr.End)
+                                               break;
+                                       if (ucp.Codepoint < cp)
+                                               continue;
+                                       while (cp < ucp.Codepoint) {
+                                               w.Write ("0,");
+                                               if (++cp % 16 == 0)
+                                                       w.WriteLine ();
+                                       }
+                                       int v = upper ? ucp.SimpleUppercaseMapping : ucp.SimpleLowercaseMapping;
+                                       if (v != 0)
+                                               w.Write ("0x{0:X},", v);
+                                       else
+                                               w.Write ("0,");
+
+                                       if (++cp % 16 == 0) {
+                                               w.WriteLine ();
+                                               w.Write ("\t");
+                                       }
+                                       if (cp >= cpr.End)
+                                               break;
+                               }
+                               w.WriteLine ("0};");
+
+                               nTable++;
+                       }
+
+                       w.WriteLine ("static const {0} *simple_{1}_case_mapping_{2} [] = {{", small ? "guint16" : "guint32", upper ? "upper" : "lower", small ? "lowarea" : "higharea");
+
+                       for (int i = 0; i < nTable; i++) {
+                               if (i > 0)
+                                       w.WriteLine (",");
+                               w.Write ("\tsimple_{1}_case_mapping_{2}_table{3}", small ? "guint16" : "guint32", upper ? "upper" : "lower", small ? "lowarea" : "higharea", i);
+                       }
+
+                       w.WriteLine ("};");
+                       w.WriteLine ();
+               }
+
+               enum GUnicodeType
+               {
+                       G_UNICODE_CONTROL,
+                       G_UNICODE_FORMAT,
+                       G_UNICODE_UNASSIGNED,
+                       G_UNICODE_PRIVATE_USE,
+                       G_UNICODE_SURROGATE,
+                       G_UNICODE_LOWERCASE_LETTER,
+                       G_UNICODE_MODIFIER_LETTER,
+                       G_UNICODE_OTHER_LETTER,
+                       G_UNICODE_TITLECASE_LETTER,
+                       G_UNICODE_UPPERCASE_LETTER,
+                       G_UNICODE_COMBINING_MARK,
+                       G_UNICODE_ENCLOSING_MARK,
+                       G_UNICODE_NON_SPACING_MARK,
+                       G_UNICODE_DECIMAL_NUMBER,
+                       G_UNICODE_LETTER_NUMBER,
+                       G_UNICODE_OTHER_NUMBER,
+                       G_UNICODE_CONNECT_PUNCTUATION,
+                       G_UNICODE_DASH_PUNCTUATION,
+                       G_UNICODE_CLOSE_PUNCTUATION,
+                       G_UNICODE_FINAL_PUNCTUATION,
+                       G_UNICODE_INITIAL_PUNCTUATION,
+                       G_UNICODE_OTHER_PUNCTUATION,
+                       G_UNICODE_OPEN_PUNCTUATION,
+                       G_UNICODE_CURRENCY_SYMBOL,
+                       G_UNICODE_MODIFIER_SYMBOL,
+                       G_UNICODE_MATH_SYMBOL,
+                       G_UNICODE_OTHER_SYMBOL,
+                       G_UNICODE_LINE_SEPARATOR,
+                       G_UNICODE_PARAGRAPH_SEPARATOR,
+                       G_UNICODE_SPACE_SEPARATOR
+               }
+
+               GUnicodeType ToGUnicodeCategory (UnicodeCategory v)
+               {
+                       switch (v) {
+                       case UnicodeCategory.UppercaseLetter:
+                               return GUnicodeType.G_UNICODE_UPPERCASE_LETTER;
+                       case UnicodeCategory.LowercaseLetter:
+                               return GUnicodeType.G_UNICODE_LOWERCASE_LETTER;
+                       case UnicodeCategory.TitlecaseLetter:
+                               return GUnicodeType.G_UNICODE_TITLECASE_LETTER;
+                       case UnicodeCategory.ModifierLetter:
+                               return GUnicodeType.G_UNICODE_MODIFIER_LETTER;
+                       case UnicodeCategory.OtherLetter:
+                               return GUnicodeType.G_UNICODE_OTHER_LETTER;
+                       case UnicodeCategory.NonSpacingMark:
+                               return GUnicodeType.G_UNICODE_NON_SPACING_MARK;
+                       case UnicodeCategory.SpacingCombiningMark:
+                               return GUnicodeType.G_UNICODE_COMBINING_MARK;
+                       case UnicodeCategory.EnclosingMark:
+                               return GUnicodeType.G_UNICODE_ENCLOSING_MARK;
+                       case UnicodeCategory.DecimalDigitNumber:
+                               return GUnicodeType.G_UNICODE_DECIMAL_NUMBER;
+                       case UnicodeCategory.LetterNumber:
+                               return GUnicodeType.G_UNICODE_LETTER_NUMBER;
+                       case UnicodeCategory.OtherNumber:
+                               return GUnicodeType.G_UNICODE_OTHER_NUMBER;
+                       case UnicodeCategory.ConnectorPunctuation:
+                               return GUnicodeType.G_UNICODE_CONNECT_PUNCTUATION;
+                       case UnicodeCategory.DashPunctuation:
+                               return GUnicodeType.G_UNICODE_DASH_PUNCTUATION;
+                       case UnicodeCategory.OpenPunctuation:
+                               return GUnicodeType.G_UNICODE_OPEN_PUNCTUATION;
+                       case UnicodeCategory.ClosePunctuation:
+                               return GUnicodeType.G_UNICODE_CLOSE_PUNCTUATION;
+                       case UnicodeCategory.InitialQuotePunctuation:
+                               return GUnicodeType.G_UNICODE_INITIAL_PUNCTUATION;
+                       case UnicodeCategory.FinalQuotePunctuation:
+                               return GUnicodeType.G_UNICODE_FINAL_PUNCTUATION;
+                       case UnicodeCategory.OtherPunctuation:
+                               return GUnicodeType.G_UNICODE_OTHER_PUNCTUATION;
+                       case UnicodeCategory.MathSymbol:
+                               return GUnicodeType.G_UNICODE_MATH_SYMBOL;
+                       case UnicodeCategory.CurrencySymbol:
+                               return GUnicodeType.G_UNICODE_CURRENCY_SYMBOL;
+                       case UnicodeCategory.ModifierSymbol:
+                               return GUnicodeType.G_UNICODE_MODIFIER_SYMBOL;
+                       case UnicodeCategory.OtherSymbol:
+                               return GUnicodeType.G_UNICODE_OTHER_SYMBOL;
+                       case UnicodeCategory.SpaceSeparator:
+                               return GUnicodeType.G_UNICODE_SPACE_SEPARATOR;
+                       case UnicodeCategory.LineSeparator:
+                               return GUnicodeType.G_UNICODE_LINE_SEPARATOR;
+                       case UnicodeCategory.ParagraphSeparator:
+                               return GUnicodeType.G_UNICODE_PARAGRAPH_SEPARATOR;
+                       case UnicodeCategory.Control:
+                               return GUnicodeType.G_UNICODE_CONTROL;
+                       case UnicodeCategory.Format:
+                               return GUnicodeType.G_UNICODE_FORMAT;
+                       case UnicodeCategory.Surrogate:
+                               return GUnicodeType.G_UNICODE_SURROGATE;
+                       case UnicodeCategory.PrivateUse:
+                               return GUnicodeType.G_UNICODE_PRIVATE_USE;
+                       case UnicodeCategory.OtherNotAssigned:
+                               return GUnicodeType.G_UNICODE_UNASSIGNED;
+                       }
+                       throw new ArgumentException (String.Format ("Unexpected category {0}", v));
+               }
+       }
+
+       public class CodePointRange
+       {
+               public CodePointRange (int start, int end)
+               {
+                       Start = start;
+                       End = end;
+               }
+
+               public int Start { get; set; }
+               public int End { get; set; }
+       }
+
+       public class UcdCharacterProperty
+       {
+               public int Codepoint { get; set; }
+               public string Name { get; set; }
+               public UnicodeCategory Category { get; set; }
+               public byte? CanonicalCombiningClass { get; set; }
+               public UcdBidiClass BidiClass { get; set; }
+               public UcdDecompositionType DecompositionType { get; set; }
+               public int [] DecompositionMapping { get; set; }
+               public string DecimalDigitValue { get; set; }
+               public string DigitValue { get; set; }
+               public string NumericValue { get; set; }
+               public bool BidiMirrored { get; set; }
+               public string Unicode1Name { get; set; }
+               public string IsoComment { get; set; }
+               public int SimpleUppercaseMapping { get; set; }
+               public int SimpleLowercaseMapping { get; set; }
+               public int SimpleTitlecaseMapping { get; set; }
+       }
+
+       public enum UcdBidiClass
+       {
+               None,
+               L,
+               LRE,
+               LRO,
+               R,
+               AL,
+               RLE,
+               RLO,
+               PDF,
+               EN,
+               ES,
+               ET,
+               AN,
+               CS,
+               NSM,
+               BN,
+               B,
+               S,
+               WS,
+               ON
+       }
+
+       public enum UcdDecompositionType
+       {
+               None,
+               Font,
+               NoBreak,
+               Initial,
+               Medial,
+               Final,
+               Isolated,
+               Circle,
+               Super,
+               Sub,
+               Vertical,
+               Wide,
+               Narrow,
+               Small,
+               Square,
+               Fraction,
+               Compat
+       }
+}