2005-06-30 Atsushi Enomoto <atsushi@ximian.com>
authorAtsushi Eno <atsushieno@gmail.com>
Thu, 30 Jun 2005 06:32:25 +0000 (06:32 -0000)
committerAtsushi Eno <atsushieno@gmail.com>
Thu, 30 Jun 2005 06:32:25 +0000 (06:32 -0000)
* mono-tailoring-source.txt : fixed description on '*' in sortkeys.
* SimpleCollator.cs : Now it fully uses tailoring info. Fixed
  contraction search that worked only when string is contraction.
  Removed commented code. Minor refactoring.
* TestDriver.cs : added example that uses "ZS" in Hungarian sorting.

svn path=/trunk/mcs/; revision=46755

mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs
mcs/class/corlib/Mono.Globalization.Unicode/TestDriver.cs
mcs/class/corlib/Mono.Globalization.Unicode/mono-tailoring-source.txt

index ae16641cb4e1668116454d5e727f84e3fa4a39cc..e37715dc75f332bdfbfbe11b787aa7b1afcd9646 100644 (file)
@@ -1,3 +1,11 @@
+2005-06-30  Atsushi Enomoto  <atsushi@ximian.com>
+
+       * mono-tailoring-source.txt : fixed description on '*' in sortkeys.
+       * SimpleCollator.cs : Now it fully uses tailoring info. Fixed
+         contraction search that worked only when string is contraction.
+         Removed commented code. Minor refactoring.
+       * TestDriver.cs : added example that uses "ZS" in Hungarian sorting.
+
 2005-06-29  Atsushi Enomoto  <atsushi@ximian.com>
 
        * create-mscompat-collation-table.cs,
index 72c78387e093a55a87f23b97b174d486c22dabf7..163ad645bc9aec5df8eeb7eaf1902a28e24512ac 100644 (file)
@@ -55,8 +55,10 @@ namespace Mono.Globalization.Unicode
                readonly byte [] cjkLv2Table;
                readonly CodePointIndexer cjkLv2Indexer;
                readonly int lcid;
+               readonly Contraction [] contractions;
+               readonly Level2Map [] level2Maps;
 
-               #region Tailoring supports
+               #region Tailoring support classes
                // Possible mapping types are:
                //
                //      - string to string (ReplacementMap)
@@ -127,9 +129,6 @@ namespace Mono.Globalization.Unicode
                        }
                }
 
-               readonly Contraction [] contractions;
-               readonly Level2Map [] level2Maps;
-
                #endregion
 
                #region .ctor() and split functions
@@ -156,7 +155,13 @@ namespace Mono.Globalization.Unicode
                        frenchSort = t.FrenchSort;
                        BuildTailoringTables (culture, t, ref contractions,
                                ref level2Maps);
+                       // FIXME: Since tailorings are mostly for latin
+                       // (and in some cases Cyrillic) characters, it would
+                       // be much better for performance to store "start 
+                       // indexes" for > 370 (culture-specific letters).
+
 /*
+// dump tailoring table
 Console.WriteLine ("******** building table for {0} : c - {1} d - {2}",
 culture.LCID, contractions.Length, level2Maps.Length);
 foreach (Contraction c in contractions) {
@@ -325,7 +330,7 @@ Console.WriteLine (" -> {0}", c.Replacement);
                                if (ct.Source [0] > s [start])
                                        return null; // it's already sorted
                                char [] chars = ct.Source;
-                               if (end - start != chars.Length)
+                               if (end - start < chars.Length)
                                        continue;
                                bool match = true;
                                for (int n = 0; n < chars.Length; n++)
@@ -367,32 +372,6 @@ Console.WriteLine (" -> {0}", c.Replacement);
                        return Uni.GetExpansion ((char) i);
                }
 
-               /*
-               bool HasContraction (char c, bool strict)
-               {
-                       if (HasContraction (c, strict, contractions))
-                               return true;
-                       if (lcid != 127)
-                               return HasContraction (c, strict, invariant.contractions);
-                       return false;
-               }
-
-               bool HasContraction (char c, bool strict, Contraction [] clist)
-               {
-                       for (int i = 0; i < clist.Length; i++) {
-                               Contraction ct = clist [i];
-                               if (ct.Source [0] > c)
-                                       return false; // it's already sorted
-                               if (ct.Source [0] == c) {
-                                       if (strict && ct.Source.Length > 1)
-                                               continue;
-                                       return true;
-                               }
-                       }
-                       return false;
-               }
-               */
-
                int FilterOptions (int i)
                {
                        if (ignoreWidth)
@@ -420,23 +399,37 @@ Console.WriteLine (" -> {0}", c.Replacement);
                {
                        SetOptions (options);
 
-                       int end = start + length;
                        buf.Initialize (options, s, frenchSort);
+                       int end = start + length;
+                       GetSortKey (s, start, end);
+                       return buf.GetResultAndReset ();
+               }
+
+               void GetSortKey (string s, int start, int end)
+               {
                        for (int n = start; n < end; n++) {
                                int i = s [n];
                                if (IsIgnorable (i))
                                        continue;
                                i = FilterOptions (i);
 
-                               string expansion = GetExpansion (i);
-                               if (expansion != null) {
-                                       foreach (char e in expansion)
-                                               FillSortKeyRaw (e);
+                               Contraction ct = GetContraction (s, n, end);
+                               if (ct != null) {
+                                       if (ct.Replacement != null)
+                                               GetSortKey (ct.Replacement, 0, ct.Replacement.Length);
+                                       else {
+                                               byte [] b = ct.SortKey;
+                                               buf.AppendNormal (
+                                                       b [0],
+                                                       b [1],
+                                                       b [2] != 1 ? b [2] : Level2 (i),
+                                                       b [3] != 1 ? b [3] : Uni.Level3 (i));
+                                       }
+                                       n += ct.Source.Length - 1;
                                }
                                else
                                        FillSortKeyRaw (i);
                        }
-                       return buf.GetResultAndReset ();
                }
 
                bool IsIgnorable (int i)
index 0a2838ca8d02e1c24188bdfe171a1b73f9ca65e4..b3d6da3a94f876e30da9ebc55856d0a804242bfb 100644 (file)
@@ -76,6 +76,9 @@ namespace Mono.Globalization.Unicode
                        LastIndexOf ("BBCBBC", "BC", CompareOptions.IgnoreCase);
                        LastIndexOf ("original", "rig", CompareOptions.None);
                        Console.WriteLine ("original".LastIndexOf ("rig"));
+
+                       coll = new SimpleCollator (new CultureInfo ("hu"));
+                       DumpSortKey ("ZSAZS1");
                }
 
                void Generate ()
index 80446eef538c10d202497d9fcca812bc37de2819..98231ed417cfa3e8818e3fedaa2ec20a281c5f44 100644 (file)
@@ -7,8 +7,9 @@
 #      - S = D
 #              it means source S is considered as equivalent to D
 #      - S : cc pp ss tt ii
-#              it means source S has a sortkey. Here * can be used and it
-#              character in S.
+#              it means source S has a sortkey. For level 2 and 3, * can be 
+#              used to indicate that it copies the corresponding weight of
+#              the first character of S.
 #
 # Level 4 tailorings is not supported (it is logically done).
 #