if (idx == 0)
return 0;
while (mappedChars [idx] == startCh) {
+ int prevCB = 0;
+ int combiningClass = 0;
for (int i = 1, j = 1; ; i++, j++) {
+ prevCB = combiningClass;
+
if (mappedChars [idx + i] == 0)
// matched
return idx;
// handle blocked characters here.
char curCh;
- int combiningClass;
- int nextCB = 0;
+ bool match = false;
do {
curCh = s != null ?
s [start + j] :
sb [start + j];
combiningClass = GetCombiningClass (curCh);
- if (++j + start >= charsLength ||
- combiningClass == 0)
+ if (mappedChars [idx + i] == curCh) {
+ match = true;
break;
- nextCB = GetCombiningClass (
- s != null ?
- s [start + j] :
- sb [start + j]);
- } while (nextCB > 0 && combiningClass >= nextCB);
- j--;
- if (mappedChars [idx + i] == curCh)
- continue;
- if (mappedChars [idx + i] > curCh)
- return 0; // no match
+ }
+ if (combiningClass < prevCB) // blocked. Give up this map entry.
+ break;
+ if (++j + start >= charsLength || combiningClass == 0)
+ break;
+ } while (true);
+
+ if (match)
+ continue; // check next character in the current map entry string.
+ if (prevCB < combiningClass) {
+ j--;
+ if (mappedChars [idx + i] == curCh)
+ continue;
+ //if (mappedChars [idx + i] > curCh)
+ // return 0; // no match
+ }
// otherwise move idx to next item
while (mappedChars [i] != 0)
i++;
int cur = i;
// FIXME: It should check "blocked" too
for (;i > 0; i--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
- if (!CanBePrimaryComposite ((int) sb [i]))
+ if (GetCombiningClass ((int) sb [i]) == 0)
break;
int idx = 0; // index to mappedChars
i = cur;
continue;
}
+
int prim = GetPrimaryCompositeFromMapIndex (idx);
int len = GetNormalizedStringLength (prim);
if (prim == 0 || len == 0)
- throw new SystemException ("Internal error: should not happen.");
+ throw new SystemException ("Internal error: should not happen. Input: " + sb);
int removed = 0;
sb.Insert (i++, (char) prim); // always single character
// handle blocked characters here.
while (removed < len) {
- if (i + 1 < sb.Length) {
- int cb = GetCombiningClass (sb [i]);
- if (cb > 0) {
- int next = GetCombiningClass (sb [i + 1]);
- if (next != 0 && cb >= next) {
- i++;
- continue;
- }
- }
+ if (sb [i] == mappedChars [idx + removed]) {
+ sb.Remove (i, 1);
+ removed++;
+ // otherwise, skip it.
}
- sb.Remove (i, 1);
- removed++;
+ else
+ i++;
}
- i = cur - 1; // apply recursively
+ i = cur - 1;
}
}
// partly copied from Combine()
int cur = i;
- // FIXME: It should check "blocked" too
- for (;i >= 0; i--)
- if (!CanBePrimaryComposite ((int) source [i]))
+ for (;i > 0; i--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
+ if (GetCombiningClass ((int) source [i]) == 0)
break;
- i++;
+ //i++;
// Now i is the "starter"
for (; i < cur; i++) {
if (GetPrimaryCompositeCharIndex (source, i) != 0)
Assert.AreEqual (s2, s1.Normalize (NormalizationForm.FormC), "#1");
Assert.AreEqual (s2, s1.Normalize (NormalizationForm.FormKC), "#2");
}
+
+ [Test]
+ public void Normalize3 ()
+ {
+ var s = new string (new char [] { '\u064A', '\u064F', '\u0648', '\u0654', '\u0652', '\u064A', '\u064F', '\u0648', '\u0654' });
+
+ var formC = new string (new char [] { '\u064A', '\u064F', '\u0624', '\u0652', '\u064a', '\u064f', '\u0624' });
+ var formD = new string (new char [] { '\u064A', '\u064F', '\u0648', '\u0652', '\u0654', '\u064a', '\u064f', '\u0648', '\u0654' });
+ var formKC = new string (new char [] { '\u064A', '\u064F', '\u0624', '\u0652', '\u064a', '\u064f', '\u0624' });
+ var formKD = new string (new char [] { '\u064A', '\u064F', '\u0648', '\u0652', '\u0654', '\u064a', '\u064f', '\u0648', '\u0654' });
+
+ Assert.AreEqual (formD, s.Normalize (NormalizationForm.FormD), "#1");
+ Assert.AreEqual (formC, s.Normalize (NormalizationForm.FormC), "#2");
+ Assert.AreEqual (formKD, s.Normalize (NormalizationForm.FormKD), "#3");
+ Assert.AreEqual (formKC, s.Normalize (NormalizationForm.FormKC), "#4");
+ }
#endif
}