//
+// create-mscompat-collation-table.cs : generates Windows-like sortkey tables.
+//
+// Author:
+// Atsushi Enomoto <atsushi@ximian.com>
+//
+// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
//
// There are two kind of sort keys : which are computed and which are laid out
// as an indexed array. Computed sort keys are:
// - Surrogate
// - PrivateUse
//
-// Also, for composite characters it should prepare different index table.
-//
// Though it is possible to "compute" level 3 weights, they are still dumped
// to an array to avoid execution cost.
//
-
-//
-// * sortkey getter signature
-//
-// int GetSortKey (string s, int index, SortKeyBuffer buf)
-// Stores sort key for corresponding character element into buf and
-// returns the length of the consumed _source_ character element in s.
-//
-// * character length to consume
-//
-// If there are characters whose primary weight is 0, they are consumed
-// and considered as a part of the character element.
-//
#define Binary
using System;
cjkKOlv2 = CompressArray (cjkKOlv2, UUtil.Cjk);
// Ignorables
- CResult.WriteLine ("static const guint8* collation_table_ignorableFlags [] = {");
+ CResult.WriteLine ("static const guint8 collation_table_ignorableFlags [] = {");
CSResult.WriteLine ("static readonly byte [] ignorableFlagsArr = new byte [] {");
#if Binary
MemoryStream ms = new MemoryStream ();
CResult.WriteLine ();
}
}
+ CResult.WriteLine ("0};");
CSResult.WriteLine ("};");
CSResult.WriteLine ();
// Primary category
- CResult.WriteLine ("static const guint8* collation_table_category [] = {");
+ CResult.WriteLine ("static const guint8 collation_table_category [] = {");
CSResult.WriteLine ("static readonly byte [] categoriesArr = new byte [] {");
#if Binary
binary.Write (categories.Length);
CSResult.WriteLine ();
// Primary weight value
- CResult.WriteLine ("static const guint8* collation_table_level1 [] = {");
+ CResult.WriteLine ("static const guint8 collation_table_level1 [] = {");
CSResult.WriteLine ("static readonly byte [] level1Arr = new byte [] {");
#if Binary
binary.Write (level1.Length);
CSResult.WriteLine ();
// Secondary weight
- CResult.WriteLine ("static const guint8* collation_table_level2 [] = {");
+ CResult.WriteLine ("static const guint8 collation_table_level2 [] = {");
CSResult.WriteLine ("static readonly byte [] level2Arr = new byte [] {");
#if Binary
binary.Write (level2.Length);
CSResult.WriteLine ();
// Thirtiary weight
- CResult.WriteLine ("static const guint8* collation_table_level3 [] = {");
+ CResult.WriteLine ("static const guint8 collation_table_level3 [] = {");
CSResult.WriteLine ("static readonly byte [] level3Arr = new byte [] {");
#if Binary
binary.Write (level3.Length);
*/
#if Binary
- using (FileStream fs = File.Create ("../collation.core.bin")) {
+ using (FileStream fs = File.Create ("../resources/collation.core.bin")) {
byte [] array = ms.ToArray ();
fs.Write (array, 0, array.Length);
}
void SerializeCJK (string name, ushort [] cjk, int max_unused)
{
- CResult.WriteLine ("static const int collation_table_collation_cjk_{0}_size [] = {1};", name, cjk.Length);
+// CResult.WriteLine ("static const int collation_table_collation_cjk_{0}_size [] = {1};", name, cjk.Length);
CSResult.WriteLine ("const int {0}ArrLength = {1};", name, cjk.Length);
- CResult.WriteLine ("static const guint8* collation_table_collation_cjk_{0} [] = {{", name);
+ int len = cjk.Length;
+ CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
+ // the actual length is *2
+ for (int i = 0; i < 4; i++, len /= 256) {
+ CResult.Write ("{0},", len & 0xFF);
+ CSResult.Write ("0x{0:X04},", len & 0xFF);
+ }
+ CResult.WriteLine ();
+ CSResult.WriteLine ();
#if Binary
MemoryStream ms = new MemoryStream ();
BinaryWriter binary = new BinaryWriter (ms);
CSResult.WriteLine ("};");
CSResult.WriteLine ();
#if Binary
- using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
+ using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
byte [] array = ms.ToArray ();
fs.Write (array, 0, array.Length);
}
void SerializeCJK (string name, byte [] cjk, int max)
{
- CResult.WriteLine ("static const guint8* collation_table_collation_cjk_{0} [] = {{", name);
+ CResult.WriteLine ("static const guint8 collation_table_collation_cjk_{0} [] = {{", name);
CSResult.WriteLine ("static byte [] {0}Arr = new byte [] {{", name);
#if Binary
MemoryStream ms = new MemoryStream ();
CSResult.WriteLine ("};");
CSResult.WriteLine ();
#if Binary
- using (FileStream fs = File.Create (String.Format ("../collation.{0}.bin", name))) {
+ using (FileStream fs = File.Create (String.Format ("../resources/collation.{0}.bin", name))) {
byte [] array = ms.ToArray ();
fs.Write (array, 0, array.Length);
}
{
Hashtable indexes = new Hashtable ();
Hashtable counts = new Hashtable ();
- CResult.WriteLine ("static const guint16*collation_table_tailoring = {");
- CSResult.WriteLine ("static char [] tailorings = new char [] {");
+ CResult.WriteLine ("static const guint16 collation_table_tailoring [] = {");
+ CSResult.WriteLine ("static char [] tailoringArr = new char [] {");
int count = 0;
#if Binary
MemoryStream ms = new MemoryStream ();
CResult.WriteLine ("0};");
CSResult.WriteLine ("};");
- CResult.WriteLine ("static const int collation_tailoring_count = {0};", tailorings.Count);
- CResult.WriteLine ("static const int* collation_tailoring_infos = {");
+ CResult.WriteLine ("static const guint32 collation_table_tailoring_infos [] = {");
+ CResult.WriteLine ("{0}, /*count*/", tailorings.Count);
CSResult.WriteLine ("static TailoringInfo [] tailoringInfos = new TailoringInfo [] {");
#if Binary
byte [] rawdata = ms.ToArray ();
binary.Write (rawdata, 0, rawdata.Length);
- using (FileStream fs = File.Create ("../collation.tailoring.bin")) {
+ using (FileStream fs = File.Create ("../resources/collation.tailoring.bin")) {
byte [] array = ms.ToArray ();
fs.Write (array, 0, array.Length);
}
AddCharMapGroup ((char) i, 0xE, 1, 0);
}
+ // IPA extensions
+ // FIXME: this results in not equivalent values to
+ // Windows, but is safer for comparison.
+ char [] ipaArray = new char [0x300 - 0x250 + 0x20];
+ for (int i = 0x40; i < 0x60; i++)
+ if (Char.IsLetter ((char) i))
+ ipaArray [i - 0x40] = (char) (i);
+ for (int i = 0x250; i < 0x300; i++)
+ if (Char.IsLetter ((char) i))
+ ipaArray [i - 0x250 + 0x20] = (char) i;
+ Array.Sort (ipaArray, UCAComparer.Instance);
+ int targetASCII = 0;
+ byte latinDiacritical = 0x7B;
+ foreach (char c in ipaArray) {
+ if (c <= 'Z') {
+ targetASCII = c;
+ latinDiacritical = 0x7B;
+ }
+ else
+ map [(int) c] = new CharMapEntry (
+ 0xE,
+ map [targetASCII].Level1,
+ latinDiacritical++);
+ }
+
// Greek and Coptic
// FIXME: this is (mysterious and) incomplete.