3 using System.Globalization;
5 namespace Mono.Globalization.Unicode
7 // Internal sort key storage that is reused during GetSortKey.
8 internal class SortKeyBuffer
10 // l4s = small kana sensitivity, l4t = mark type,
11 // l4k = katakana flag, l4w = kana width sensitivity
12 int l1, l2, l3, l4s, l4t, l4k, l4w, l5;
13 byte [] l1b, l2b, l3b, l4sb, l4tb, l4kb, l4wb, l5b;
21 CompareOptions options;
23 public SortKeyBuffer (int lcid)
29 l1 = l2 = l3 = l4s = l4t = l4k = l4w = l5 = 0;
34 // It is used for CultureInfo.ClearCachedData().
35 internal void ClearBuffer ()
37 l1b = l2b = l3b = l4sb = l4tb = l4kb = l4wb = l5b = null;
40 internal void Initialize (CompareOptions options, int lcid, string s, bool frenchSort)
44 this.options = options;
46 processLevel2 = (options & CompareOptions.IgnoreNonSpace) == 0;
47 this.frenchSort = frenchSort;
49 // For Korean text it is likely to be much bigger (for
50 // Jamo), but even in ko-KR most of the compared
51 // strings won't be Hangul.
52 if (l1b == null || l1b.Length < len)
53 l1b = new byte [len * 2 + 10];
55 if (processLevel2 && (l2b == null || l2b.Length < len))
56 l2b = new byte [len + 10];
57 if (l3b == null || l3b.Length < len)
58 l3b = new byte [len + 10];
60 // This weight is used only in Japanese text.
61 // We could expand the initial length as well as
62 // primary length (actually x3), but even in ja-JP
63 // most of the compared strings won't be Japanese.
77 internal void AppendCJKExtension (byte lv1msb, byte lv1lsb)
79 AppendBufferPrimitive (0xFE, ref l1b, ref l1);
80 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
81 AppendBufferPrimitive (lv1msb, ref l1b, ref l1);
82 AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
84 AppendBufferPrimitive (2, ref l2b, ref l2);
85 AppendBufferPrimitive (2, ref l3b, ref l3);
88 // LAMESPEC: Windows handles some of Hangul Jamo as to have
89 // more than two primary weight values. However this causes
90 // incorrect zero-termination. So I just ignore them and
91 // treat it as usual character.
93 internal void AppendJamo (byte category, byte lv1msb, byte lv1lsb)
95 AppendNormal (category, lv1msb, 0, 0);
96 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
97 AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
98 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
99 // FIXME: those values looks extraneous but might be
100 // some advanced use. Worthy of digging into it.
101 AppendBufferPrimitive (0, ref l1b, ref l1);
102 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
103 AppendBufferPrimitive (0, ref l1b, ref l1);
107 // Append sort key value from table normally.
108 internal void AppendKana (byte category, byte lv1, byte lv2, byte lv3, bool isSmallKana, byte markType, bool isKatakana, bool isHalfWidth)
110 AppendNormal (category, lv1, lv2, lv3);
112 AppendBufferPrimitive ((byte) (isSmallKana ? 0xC4 : 0xE4), ref l4sb, ref l4s);
113 AppendBufferPrimitive (markType, ref l4tb, ref l4t);
114 AppendBufferPrimitive ((byte) (isKatakana ? 0xC4 : 0xE4), ref l4kb, ref l4k);
115 AppendBufferPrimitive ((byte) (isHalfWidth ? 0xC4 : 0xE4), ref l4wb, ref l4w);
118 // Append sort key value from table normally.
119 internal void AppendNormal (byte category, byte lv1, byte lv2, byte lv3)
126 // Special weight processing
127 if (category == 6 && (options & CompareOptions.StringSort) == 0) {
128 AppendLevel5 (category, lv1);
132 // non-primary diacritical weight is added to that of
133 // the previous character (and does not reset level 3
135 if (processLevel2 && category == 1 && l1 > 0) {
136 lv2 = (byte) (lv2 + l2b [--l2]);
141 AppendBufferPrimitive (category, ref l1b, ref l1);
142 AppendBufferPrimitive (lv1, ref l1b, ref l1);
145 AppendBufferPrimitive (lv2, ref l2b, ref l2);
146 AppendBufferPrimitive (lv3, ref l3b, ref l3);
149 // Append variable-weight character.
150 // It uses level 2 index for counting offsets (since level1
151 // might be longer than 1).
152 private void AppendLevel5 (byte category, byte lv1)
156 // If it strictly matches to Windows, offsetValue is always l2.
157 int offsetValue = l2 - level5LastPos;
158 // If it strictly matches ti Windows, no 0xFF here.
159 for (; offsetValue > 8192; offsetValue -= 8192)
160 AppendBufferPrimitive (0xFF, ref l5b, ref l5);
162 // LAMESPEC: Windows cannot compute lv5 values for
163 // those string that has length larger than 8064.
164 // (It reminds me of SQL Server varchar length).
165 int offsetValue = (l2 + 1) % 8192;
167 AppendBufferPrimitive ((byte) ((offsetValue / 64) + 0x80), ref l5b, ref l5);
168 AppendBufferPrimitive ((byte) (offsetValue % 64 * 4 + 3), ref l5b, ref l5);
173 AppendBufferPrimitive (category, ref l5b, ref l5);
174 AppendBufferPrimitive (lv1, ref l5b, ref l5);
177 private void AppendBufferPrimitive (byte value, ref byte [] buf, ref int bidx)
179 buf [bidx++] = value;
180 if (bidx == buf.Length) {
181 byte [] tmp = new byte [bidx * 2];
182 Array.Copy (buf, tmp, buf.Length);
187 public SortKey GetResultAndReset ()
189 SortKey ret = GetResult ();
194 // For level2-5, 02 is the default and could be cut (implied).
198 private int GetOptimizedLength (byte [] data, int len, byte defaultValue)
201 for (int i = 0; i < len; i++)
202 if (data [i] != defaultValue)
207 public SortKey GetResult ()
209 if (frenchSort && !frenchSorted) {
211 for (; i < l2b.Length; i++)
214 Array.Reverse (l2b, 0, i);
218 l2 = GetOptimizedLength (l2b, l2, 2);
219 l3 = GetOptimizedLength (l3b, l3, 2);
220 bool hasJapaneseWeight = (l4s > 0); // snapshot before being optimized
221 l4s = GetOptimizedLength (l4sb, l4s, 0xE4);
222 l4t = GetOptimizedLength (l4tb, l4t, 3);
223 l4k = GetOptimizedLength (l4kb, l4k, 0xE4);
224 l4w = GetOptimizedLength (l4wb, l4w, 0xE4);
225 l5 = GetOptimizedLength (l5b, l5, 2);
227 int length = l1 + l2 + l3 + l5 + 5;
228 int jpLength = l4s + l4t + l4k + l4w;
229 if (hasJapaneseWeight)
230 length += jpLength + 4;
232 byte [] ret = new byte [length];
233 Array.Copy (l1b, ret, l1);
234 ret [l1] = 1; // end-of-level mark
237 Array.Copy (l2b, 0, ret, cur, l2);
239 ret [cur++] = 1; // end-of-level mark
241 Array.Copy (l3b, 0, ret, cur, l3);
243 ret [cur++] = 1; // end-of-level mark
244 if (hasJapaneseWeight) {
245 Array.Copy (l4sb, 0, ret, cur, l4s);
247 ret [cur++] = 0xFF; // end-of-jp-subsection
248 Array.Copy (l4tb, 0, ret, cur, l4t);
250 ret [cur++] = 2; // end-of-jp-middle-subsection
251 Array.Copy (l4kb, 0, ret, cur, l4k);
253 ret [cur++] = 0xFF; // end-of-jp-subsection
254 Array.Copy (l4wb, 0, ret, cur, l4w);
256 ret [cur++] = 0xFF; // end-of-jp-subsection
258 ret [cur++] = 1; // end-of-level mark
260 Array.Copy (l5b, 0, ret, cur, l5);
262 ret [cur++] = 0; // end-of-data mark
263 return new SortKey (lcid, source, ret, options, l1, l2, l3, l4s, l4t, l4k, l4w, l5);