2 // SortKeyBuffer.cs : buffer implementation for GetSortKey()
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System.Globalization;
33 namespace Mono.Globalization.Unicode
35 // Internal sort key storage that is reused during GetSortKey.
36 internal class SortKeyBuffer
38 // l4s = small kana sensitivity, l4t = mark type,
39 // l4k = katakana flag, l4w = kana width sensitivity
40 int l1, l2, l3, l4s, l4t, l4k, l4w, l5;
41 byte [] l1b, l2b, l3b, l4sb, l4tb, l4kb, l4wb, l5b;
49 CompareOptions options;
51 public SortKeyBuffer (int lcid)
57 l1 = l2 = l3 = l4s = l4t = l4k = l4w = l5 = 0;
62 // It is used for CultureInfo.ClearCachedData().
63 internal void ClearBuffer ()
65 l1b = l2b = l3b = l4sb = l4tb = l4kb = l4wb = l5b = null;
68 internal void Initialize (CompareOptions options, int lcid, string s, bool frenchSort)
72 this.options = options;
74 processLevel2 = (options & CompareOptions.IgnoreNonSpace) == 0;
75 this.frenchSort = frenchSort;
77 // For Korean text it is likely to be much bigger (for
78 // Jamo), but even in ko-KR most of the compared
79 // strings won't be Hangul.
80 if (l1b == null || l1b.Length < len)
81 l1b = new byte [len * 2 + 10];
83 if (processLevel2 && (l2b == null || l2b.Length < len))
84 l2b = new byte [len + 10];
85 if (l3b == null || l3b.Length < len)
86 l3b = new byte [len + 10];
88 // This weight is used only in Japanese text.
89 // We could expand the initial length as well as
90 // primary length (actually x3), but even in ja-JP
91 // most of the compared strings won't be Japanese.
105 internal void AppendCJKExtension (byte lv1msb, byte lv1lsb)
107 AppendBufferPrimitive (0xFE, ref l1b, ref l1);
108 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
109 AppendBufferPrimitive (lv1msb, ref l1b, ref l1);
110 AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
112 AppendBufferPrimitive (2, ref l2b, ref l2);
113 AppendBufferPrimitive (2, ref l3b, ref l3);
116 // LAMESPEC: Windows handles some of Hangul Jamo as to have
117 // more than two primary weight values. However this causes
118 // incorrect zero-termination. So I just ignore them and
119 // treat it as usual character.
121 internal void AppendJamo (byte category, byte lv1msb, byte lv1lsb)
123 AppendNormal (category, lv1msb, 0, 0);
124 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
125 AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
126 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
127 // FIXME: those values looks extraneous but might be
128 // some advanced use. Worthy of digging into it.
129 AppendBufferPrimitive (0, ref l1b, ref l1);
130 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
131 AppendBufferPrimitive (0, ref l1b, ref l1);
135 // Append sort key value from table normally.
136 internal void AppendKana (byte category, byte lv1, byte lv2, byte lv3, bool isSmallKana, byte markType, bool isKatakana, bool isHalfWidth)
138 AppendNormal (category, lv1, lv2, lv3);
140 AppendBufferPrimitive ((byte) (isSmallKana ? 0xC4 : 0xE4), ref l4sb, ref l4s);
141 AppendBufferPrimitive (markType, ref l4tb, ref l4t);
142 AppendBufferPrimitive ((byte) (isKatakana ? 0xC4 : 0xE4), ref l4kb, ref l4k);
143 AppendBufferPrimitive ((byte) (isHalfWidth ? 0xC4 : 0xE4), ref l4wb, ref l4w);
146 // Append sort key value from table normally.
147 internal void AppendNormal (byte category, byte lv1, byte lv2, byte lv3)
154 // Special weight processing
155 if (category == 6 && (options & CompareOptions.StringSort) == 0) {
156 AppendLevel5 (category, lv1);
160 // non-primary diacritical weight is added to that of
161 // the previous character (and does not reset level 3
163 if (processLevel2 && category == 1 && l1 > 0) {
164 lv2 = (byte) (lv2 + l2b [--l2]);
169 AppendBufferPrimitive (category, ref l1b, ref l1);
170 AppendBufferPrimitive (lv1, ref l1b, ref l1);
173 AppendBufferPrimitive (lv2, ref l2b, ref l2);
174 AppendBufferPrimitive (lv3, ref l3b, ref l3);
177 // Append variable-weight character.
178 // It uses level 2 index for counting offsets (since level1
179 // might be longer than 1).
180 private void AppendLevel5 (byte category, byte lv1)
184 // If it strictly matches to Windows, offsetValue is always l2.
185 int offsetValue = l2 - level5LastPos;
186 // If it strictly matches ti Windows, no 0xFF here.
187 for (; offsetValue > 8192; offsetValue -= 8192)
188 AppendBufferPrimitive (0xFF, ref l5b, ref l5);
190 // LAMESPEC: Windows cannot compute lv5 values for
191 // those string that has length larger than 8064.
192 // (It reminds me of SQL Server varchar length).
193 int offsetValue = (l2 + 1) % 8192;
195 AppendBufferPrimitive ((byte) ((offsetValue / 64) + 0x80), ref l5b, ref l5);
196 AppendBufferPrimitive ((byte) (offsetValue % 64 * 4 + 3), ref l5b, ref l5);
201 AppendBufferPrimitive (category, ref l5b, ref l5);
202 AppendBufferPrimitive (lv1, ref l5b, ref l5);
205 private void AppendBufferPrimitive (byte value, ref byte [] buf, ref int bidx)
207 buf [bidx++] = value;
208 if (bidx == buf.Length) {
209 byte [] tmp = new byte [bidx * 2];
210 Array.Copy (buf, tmp, buf.Length);
215 public SortKey GetResultAndReset ()
217 SortKey ret = GetResult ();
222 // For level2-5, 02 is the default and could be cut (implied).
226 private int GetOptimizedLength (byte [] data, int len, byte defaultValue)
229 for (int i = 0; i < len; i++)
230 if (data [i] != defaultValue)
235 public SortKey GetResult ()
237 if (frenchSort && !frenchSorted && l2b != null) {
239 for (; i < l2b.Length; i++)
242 Array.Reverse (l2b, 0, i);
246 l2 = GetOptimizedLength (l2b, l2, 2);
247 l3 = GetOptimizedLength (l3b, l3, 2);
248 bool hasJapaneseWeight = (l4s > 0); // snapshot before being optimized
249 l4s = GetOptimizedLength (l4sb, l4s, 0xE4);
250 l4t = GetOptimizedLength (l4tb, l4t, 3);
251 l4k = GetOptimizedLength (l4kb, l4k, 0xE4);
252 l4w = GetOptimizedLength (l4wb, l4w, 0xE4);
253 l5 = GetOptimizedLength (l5b, l5, 2);
255 int length = l1 + l2 + l3 + l5 + 5;
256 int jpLength = l4s + l4t + l4k + l4w;
257 if (hasJapaneseWeight)
258 length += jpLength + 4;
260 byte [] ret = new byte [length];
261 Array.Copy (l1b, ret, l1);
262 ret [l1] = 1; // end-of-level mark
265 Array.Copy (l2b, 0, ret, cur, l2);
267 ret [cur++] = 1; // end-of-level mark
269 Array.Copy (l3b, 0, ret, cur, l3);
271 ret [cur++] = 1; // end-of-level mark
272 if (hasJapaneseWeight) {
273 Array.Copy (l4sb, 0, ret, cur, l4s);
275 ret [cur++] = 0xFF; // end-of-jp-subsection
276 Array.Copy (l4tb, 0, ret, cur, l4t);
278 ret [cur++] = 2; // end-of-jp-middle-subsection
279 Array.Copy (l4kb, 0, ret, cur, l4k);
281 ret [cur++] = 0xFF; // end-of-jp-subsection
282 Array.Copy (l4wb, 0, ret, cur, l4w);
284 ret [cur++] = 0xFF; // end-of-jp-subsection
286 ret [cur++] = 1; // end-of-level mark
288 Array.Copy (l5b, 0, ret, cur, l5);
290 ret [cur++] = 0; // end-of-data mark
291 return new SortKey (lcid, source, ret, options, l1, l2, l3, l4s, l4t, l4k, l4w, l5);