2 // SortKeyBuffer.cs : buffer implementation for GetSortKey()
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System.Globalization;
33 namespace Mono.Globalization.Unicode
35 // Internal sort key storage that is reused during GetSortKey.
36 internal class SortKeyBuffer
38 // l4s = small kana sensitivity, l4t = mark type,
39 // l4k = katakana flag, l4w = kana width sensitivity
40 byte [] l1b, l2b, l3b, l4sb, l4tb, l4kb, l4wb, l5b;
44 int l1, l2, l3, l4s, l4t, l4k, l4w, l5;
46 CompareOptions options;
51 public SortKeyBuffer (int lcid)
57 l1 = l2 = l3 = l4s = l4t = l4k = l4w = l5 = 0;
62 // It is used for CultureInfo.ClearCachedData().
63 internal void ClearBuffer ()
65 l1b = l2b = l3b = l4sb = l4tb = l4kb = l4wb = l5b = null;
68 internal void Initialize (CompareOptions options, int lcid, string s, bool frenchSort)
72 this.options = options;
74 processLevel2 = (options & CompareOptions.IgnoreNonSpace) == 0;
75 this.frenchSort = frenchSort;
77 // For Korean text it is likely to be much bigger (for
78 // Jamo), but even in ko-KR most of the compared
79 // strings won't be Hangul.
80 if (l1b == null || l1b.Length < len)
81 l1b = new byte [len * 2 + 10];
83 if (processLevel2 && (l2b == null || l2b.Length < len))
84 l2b = new byte [len + 10];
85 if (l3b == null || l3b.Length < len)
86 l3b = new byte [len + 10];
88 // This weight is used only in Japanese text.
89 // We could expand the initial length as well as
90 // primary length (actually x3), but even in ja-JP
91 // most of the compared strings won't be Japanese.
105 internal void AppendCJKExtension (byte lv1msb, byte lv1lsb)
107 AppendBufferPrimitive (0xFE, ref l1b, ref l1);
108 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
109 AppendBufferPrimitive (lv1msb, ref l1b, ref l1);
110 AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
112 AppendBufferPrimitive (2, ref l2b, ref l2);
113 AppendBufferPrimitive (2, ref l3b, ref l3);
116 // LAMESPEC: Windows handles some of Hangul Jamo as to have
117 // more than two primary weight values. However this causes
118 // incorrect zero-termination. So I just ignore them and
119 // treat it as usual character.
121 internal void AppendJamo (byte category, byte lv1msb, byte lv1lsb)
123 AppendNormal (category, lv1msb, 0, 0);
124 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
125 AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
126 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
127 // FIXME: those values looks extraneous but might be
128 // some advanced use. Worthy of digging into it.
129 AppendBufferPrimitive (0, ref l1b, ref l1);
130 AppendBufferPrimitive (0xFF, ref l1b, ref l1);
131 AppendBufferPrimitive (0, ref l1b, ref l1);
135 // Append sort key value from table normally.
136 internal void AppendKana (byte category, byte lv1, byte lv2, byte lv3, bool isSmallKana, byte markType, bool isKatakana, bool isHalfWidth)
138 AppendNormal (category, lv1, lv2, lv3);
140 AppendBufferPrimitive ((byte) (isSmallKana ? 0xC4 : 0xE4), ref l4sb, ref l4s);
141 AppendBufferPrimitive (markType, ref l4tb, ref l4t);
142 AppendBufferPrimitive ((byte) (isKatakana ? 0xC4 : 0xE4), ref l4kb, ref l4k);
143 AppendBufferPrimitive ((byte) (isHalfWidth ? 0xC4 : 0xE4), ref l4wb, ref l4w);
146 // Append sort key value from table normally.
147 internal void AppendNormal (byte category, byte lv1, byte lv2, byte lv3)
154 // Special weight processing
155 if (category == 6 && (options & CompareOptions.StringSort) == 0) {
156 AppendLevel5 (category, lv1);
160 // non-primary diacritical weight is added to that of
161 // the previous character (and does not reset level 3
163 if (processLevel2 && category == 1 && l1 > 0) {
164 lv2 = (byte) (lv2 + l2b [--l2]);
169 AppendBufferPrimitive (category, ref l1b, ref l1);
170 AppendBufferPrimitive (lv1, ref l1b, ref l1);
173 AppendBufferPrimitive (lv2, ref l2b, ref l2);
174 AppendBufferPrimitive (lv3, ref l3b, ref l3);
177 // Append variable-weight character.
178 // It uses level 2 index for counting offsets (since level1
179 // might be longer than 1).
180 private void AppendLevel5 (byte category, byte lv1)
184 // If it strictly matches to Windows, offsetValue is always l2.
185 int offsetValue = l2 - level5LastPos;
186 // If it strictly matches ti Windows, no 0xFF here.
187 for (; offsetValue > 8192; offsetValue -= 8192)
188 AppendBufferPrimitive (0xFF, ref l5b, ref l5);
190 // LAMESPEC: Windows cannot compute lv5 values for
191 // those string that has length larger than 8064.
192 // (It reminds me of SQL Server varchar length).
193 int offsetValue = (l2 + 1) % 8192;
195 AppendBufferPrimitive ((byte) ((offsetValue / 64) + 0x80), ref l5b, ref l5);
196 AppendBufferPrimitive ((byte) (offsetValue % 64 * 4 + 3), ref l5b, ref l5);
198 // level5LastPos = l2;
201 AppendBufferPrimitive (category, ref l5b, ref l5);
202 AppendBufferPrimitive (lv1, ref l5b, ref l5);
205 private void AppendBufferPrimitive (byte value, ref byte [] buf, ref int bidx)
207 buf [bidx++] = value;
208 if (bidx == buf.Length) {
209 byte [] tmp = new byte [bidx * 2];
210 Array.Copy (buf, tmp, buf.Length);
215 public SortKey GetResultAndReset ()
217 SortKey ret = GetResult ();
222 // For level2-5, 02 is the default and could be cut (implied).
226 private int GetOptimizedLength (byte [] data, int len, byte defaultValue)
229 for (int i = 0; i < len; i++)
230 if (data [i] != defaultValue)
235 public SortKey GetResult ()
237 if (source.Length == 0)
238 return new SortKey (lcid, source, new byte [0], options, 0, 0, 0, 0, 0, 0, 0, 0);
240 if (frenchSort && !frenchSorted && l2b != null) {
242 for (; i < l2b.Length; i++)
245 Array.Reverse (l2b, 0, i);
249 l2 = GetOptimizedLength (l2b, l2, 2);
250 l3 = GetOptimizedLength (l3b, l3, 2);
251 bool hasJapaneseWeight = (l4s > 0); // snapshot before being optimized
252 l4s = GetOptimizedLength (l4sb, l4s, 0xE4);
253 l4t = GetOptimizedLength (l4tb, l4t, 3);
254 l4k = GetOptimizedLength (l4kb, l4k, 0xE4);
255 l4w = GetOptimizedLength (l4wb, l4w, 0xE4);
256 l5 = GetOptimizedLength (l5b, l5, 2);
258 int length = l1 + l2 + l3 + l5 + 5;
259 int jpLength = l4s + l4t + l4k + l4w;
260 if (hasJapaneseWeight)
261 length += jpLength + 4;
263 byte [] ret = new byte [length];
264 Array.Copy (l1b, ret, l1);
265 ret [l1] = 1; // end-of-level mark
268 Array.Copy (l2b, 0, ret, cur, l2);
270 ret [cur++] = 1; // end-of-level mark
272 Array.Copy (l3b, 0, ret, cur, l3);
274 ret [cur++] = 1; // end-of-level mark
275 if (hasJapaneseWeight) {
276 Array.Copy (l4sb, 0, ret, cur, l4s);
278 ret [cur++] = 0xFF; // end-of-jp-subsection
279 Array.Copy (l4tb, 0, ret, cur, l4t);
281 ret [cur++] = 2; // end-of-jp-middle-subsection
282 Array.Copy (l4kb, 0, ret, cur, l4k);
284 ret [cur++] = 0xFF; // end-of-jp-subsection
285 Array.Copy (l4wb, 0, ret, cur, l4w);
287 ret [cur++] = 0xFF; // end-of-jp-subsection
289 ret [cur++] = 1; // end-of-level mark
291 Array.Copy (l5b, 0, ret, cur, l5);
293 ret [cur++] = 0; // end-of-data mark
294 return new SortKey (lcid, source, ret, options, l1, l2, l3, l4s, l4t, l4k, l4w, l5);