From 3ad8d8f10395b3e57dd07c689900b292ccf2972c Mon Sep 17 00:00:00 2001 From: Atsushi Eno Date: Thu, 30 Jun 2005 06:32:25 +0000 Subject: [PATCH] 2005-06-30 Atsushi Enomoto * mono-tailoring-source.txt : fixed description on '*' in sortkeys. * SimpleCollator.cs : Now it fully uses tailoring info. Fixed contraction search that worked only when string is contraction. Removed commented code. Minor refactoring. * TestDriver.cs : added example that uses "ZS" in Hungarian sorting. svn path=/trunk/mcs/; revision=46755 --- .../Mono.Globalization.Unicode/ChangeLog | 8 +++ .../SimpleCollator.cs | 67 +++++++++---------- .../Mono.Globalization.Unicode/TestDriver.cs | 3 + .../mono-tailoring-source.txt | 5 +- 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog b/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog index ae16641cb4e..e37715dc75f 100644 --- a/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog +++ b/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog @@ -1,3 +1,11 @@ +2005-06-30 Atsushi Enomoto + + * mono-tailoring-source.txt : fixed description on '*' in sortkeys. + * SimpleCollator.cs : Now it fully uses tailoring info. Fixed + contraction search that worked only when string is contraction. + Removed commented code. Minor refactoring. + * TestDriver.cs : added example that uses "ZS" in Hungarian sorting. + 2005-06-29 Atsushi Enomoto * create-mscompat-collation-table.cs, diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs b/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs index 72c78387e09..163ad645bc9 100644 --- a/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs +++ b/mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs @@ -55,8 +55,10 @@ namespace Mono.Globalization.Unicode readonly byte [] cjkLv2Table; readonly CodePointIndexer cjkLv2Indexer; readonly int lcid; + readonly Contraction [] contractions; + readonly Level2Map [] level2Maps; - #region Tailoring supports + #region Tailoring support classes // Possible mapping types are: // // - string to string (ReplacementMap) @@ -127,9 +129,6 @@ namespace Mono.Globalization.Unicode } } - readonly Contraction [] contractions; - readonly Level2Map [] level2Maps; - #endregion #region .ctor() and split functions @@ -156,7 +155,13 @@ namespace Mono.Globalization.Unicode frenchSort = t.FrenchSort; BuildTailoringTables (culture, t, ref contractions, ref level2Maps); + // FIXME: Since tailorings are mostly for latin + // (and in some cases Cyrillic) characters, it would + // be much better for performance to store "start + // indexes" for > 370 (culture-specific letters). + /* +// dump tailoring table Console.WriteLine ("******** building table for {0} : c - {1} d - {2}", culture.LCID, contractions.Length, level2Maps.Length); foreach (Contraction c in contractions) { @@ -325,7 +330,7 @@ Console.WriteLine (" -> {0}", c.Replacement); if (ct.Source [0] > s [start]) return null; // it's already sorted char [] chars = ct.Source; - if (end - start != chars.Length) + if (end - start < chars.Length) continue; bool match = true; for (int n = 0; n < chars.Length; n++) @@ -367,32 +372,6 @@ Console.WriteLine (" -> {0}", c.Replacement); return Uni.GetExpansion ((char) i); } - /* - bool HasContraction (char c, bool strict) - { - if (HasContraction (c, strict, contractions)) - return true; - if (lcid != 127) - return HasContraction (c, strict, invariant.contractions); - return false; - } - - bool HasContraction (char c, bool strict, Contraction [] clist) - { - for (int i = 0; i < clist.Length; i++) { - Contraction ct = clist [i]; - if (ct.Source [0] > c) - return false; // it's already sorted - if (ct.Source [0] == c) { - if (strict && ct.Source.Length > 1) - continue; - return true; - } - } - return false; - } - */ - int FilterOptions (int i) { if (ignoreWidth) @@ -420,23 +399,37 @@ Console.WriteLine (" -> {0}", c.Replacement); { SetOptions (options); - int end = start + length; buf.Initialize (options, s, frenchSort); + int end = start + length; + GetSortKey (s, start, end); + return buf.GetResultAndReset (); + } + + void GetSortKey (string s, int start, int end) + { for (int n = start; n < end; n++) { int i = s [n]; if (IsIgnorable (i)) continue; i = FilterOptions (i); - string expansion = GetExpansion (i); - if (expansion != null) { - foreach (char e in expansion) - FillSortKeyRaw (e); + Contraction ct = GetContraction (s, n, end); + if (ct != null) { + if (ct.Replacement != null) + GetSortKey (ct.Replacement, 0, ct.Replacement.Length); + else { + byte [] b = ct.SortKey; + buf.AppendNormal ( + b [0], + b [1], + b [2] != 1 ? b [2] : Level2 (i), + b [3] != 1 ? b [3] : Uni.Level3 (i)); + } + n += ct.Source.Length - 1; } else FillSortKeyRaw (i); } - return buf.GetResultAndReset (); } bool IsIgnorable (int i) diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs b/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs index 0a2838ca8d0..b3d6da3a94f 100644 --- a/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs +++ b/mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs @@ -76,6 +76,9 @@ namespace Mono.Globalization.Unicode LastIndexOf ("BBCBBC", "BC", CompareOptions.IgnoreCase); LastIndexOf ("original", "rig", CompareOptions.None); Console.WriteLine ("original".LastIndexOf ("rig")); + + coll = new SimpleCollator (new CultureInfo ("hu")); + DumpSortKey ("ZSAZS1"); } void Generate () diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/mono-tailoring-source.txt b/mcs/class/corlib/Mono.Globalization.Unicode/mono-tailoring-source.txt index 80446eef538..98231ed417c 100644 --- a/mcs/class/corlib/Mono.Globalization.Unicode/mono-tailoring-source.txt +++ b/mcs/class/corlib/Mono.Globalization.Unicode/mono-tailoring-source.txt @@ -7,8 +7,9 @@ # - S = D # it means source S is considered as equivalent to D # - S : cc pp ss tt ii -# it means source S has a sortkey. Here * can be used and it -# character in S. +# it means source S has a sortkey. For level 2 and 3, * can be +# used to indicate that it copies the corresponding weight of +# the first character of S. # # Level 4 tailorings is not supported (it is logically done). # -- 2.25.1