byte [] diacritical = new byte [char.MaxValue + 1];
string [] diacritics = new string [] {
- // LATIN
- "WITH VERTICAL LINE ABOVE;",
+ // LATIN, CYRILLIC etc.
+ "UPTURN", "DOUBLE-STRUCK",
+ "MIDDLE HOOK", "WITH VERTICAL LINE ABOVE;",
"WITH GRAVE ACCENT;", "WITH ACUTE ACCENT;", "WITH CIRCUMFLEX ACCENT;",
"WITH ACUTE;", "WITH GRAVE;", "WITH DOT ABOVE;", " MIDDLE DOT;",
"WITH CIRCUMFLEX;", "WITH DIAERESIS;", "WITH CARON;", "WITH BREVE;",
//
"WITH OVERLINE",
"WITH HOOK;", "LEFT HOOK;", " WITH HOOK ABOVE;",
- " DOUBLE GRAVE;",
+ " DOUBLE GRAVE",
" INVERTED BREVE",
"ROMAN NUMERAL",
" PRECEDED BY APOSTROPHE",
" CIRCUMFLEX AND DOT BELOW",
" BREVE AND DOT BELOW",
" DOT BELOW AND MACRON",
+ " TONE TWO",
" HORN AND HOOK ABOVE",
" HORN AND DOT",
// CIRCLED, PARENTHESIZED and so on
};
byte [] diacriticWeights = new byte [] {
// LATIN.
- 5,
+ 3, 3, 5, 5,
0xF, 0xE, 0x12,
0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
0x17, 0x19, 0x1A, 0x1B, 0x1C,
//
0x60, 0x60, 0x61, 0x61, 0x63, 0x68, 0x68,
0x69, 0x69, 0x6A, 0x6D, 0x6E,
- 0x95, 0xAA,
+ 0x87, 0x95, 0xAA,
// CIRCLED, PARENTHESIZED and so on.
0xEE, 0xEE, 0xEE, 0xEE, 0xEE,
0xF3, 0xF3, 0xF3
0xE50, 0xE60, 0xED0, 0xEE0
};
- char [] orderedCyrillic;
char [] orderedGurmukhi;
char [] orderedGujarati;
char [] orderedGeorgian;
// cp -> level1 value
Hashtable arabicLetterPrimaryValues = new Hashtable ();
- Hashtable cyrillicLetterPrimaryValues = new Hashtable ();
// letterName -> cp
Hashtable arabicNameMap = new Hashtable ();
- Hashtable cyrillicNameMap = new Hashtable ();
// cp -> Hashtable [decompType] -> cp
Hashtable nfkdMap = new Hashtable ();
#if Binary
MemoryStream ms = new MemoryStream ();
BinaryWriter binary = new BinaryWriter (ms);
+ binary.Write (cjk.Length);
#endif
for (int i = 0; i < cjk.Length; i++) {
if (i + offset == max)
ParseJISOrder (cp932); // in prior to ParseUnidata()
ParseUnidata (unidata);
+ ModifyUnidata ();
ParseDerivedCoreProperties (derivedCoreProps);
ParseScripts (scripts);
ParseCJK (chXML, jaXML, koXML);
}
// Box names
- if (0x2500 <= cp && cp < 0x25B0) {
+ if (0x2500 <= cp && cp < 0x2600) {
int value = 0;
// flags:
// up:1 down:2 right:4 left:8 vert:16 horiz:32
10, 10, 11, 11,
12, 12, 13, 13,
14, 14, 14, 14};
- if (s.IndexOf ("BOX DRAWINGS ") > 0) {
+ if (s.IndexOf ("BOX DRAWINGS ") >= 0) {
int flag = 0;
- if (s.IndexOf (" UP") > 0)
+ if (s.IndexOf (" UP") >= 0)
flag |= 1;
- if (s.IndexOf (" DOWN") > 0)
+ if (s.IndexOf (" DOWN") >= 0)
flag |= 2;
- if (s.IndexOf (" RIGHT") > 0)
+ if (s.IndexOf (" RIGHT") >= 0)
flag |= 4;
- if (s.IndexOf (" LEFT") > 0)
+ if (s.IndexOf (" LEFT") >= 0)
flag |= 8;
- if (s.IndexOf (" VERTICAL") > 0)
+ if (s.IndexOf (" VERTICAL") >= 0)
flag |= 16;
- if (s.IndexOf (" HORIZONTAL") > 0)
+ if (s.IndexOf (" HORIZONTAL") >= 0)
flag |= 32;
int fidx = flags.IndexOf (flag);
value = fidx < 0 ? fidx : offsets [fidx];
- } else if (s.IndexOf ("BLOCK") > 0) {
- if (s.IndexOf ("ONE EIGHTH") > 0)
+ } else if (s.IndexOf ("BLOCK") >= 0) {
+ if (s.IndexOf ("ONE EIGHTH") >= 0)
value = 0x12;
- else if (s.IndexOf ("ONE QUARTER") > 0)
+ else if (s.IndexOf ("ONE QUARTER") >= 0)
value = 0x13;
- else if (s.IndexOf ("THREE EIGHTHS") > 0)
+ else if (s.IndexOf ("THREE EIGHTHS") >= 0)
value = 0x14;
- else if (s.IndexOf ("HALF") > 0)
+ else if (s.IndexOf ("HALF") >= 0)
value = 0x15;
- else if (s.IndexOf ("FIVE EIGHTHS") > 0)
+ else if (s.IndexOf ("FIVE EIGHTHS") >= 0)
value = 0x16;
- else if (s.IndexOf ("THREE QUARTERS") > 0)
+ else if (s.IndexOf ("THREE QUARTERS") >= 0)
value = 0x17;
- else if (s.IndexOf ("SEVEN EIGHTHS") > 0)
+ else if (s.IndexOf ("SEVEN EIGHTHS") >= 0)
value = 0x18;
else
value = 0x19;
- } else if (s.IndexOf ("SHADE") > 0)
+ }
+ else if (s.IndexOf ("SHADE") >= 0)
value = 0x19;
+ else if (s.IndexOf ("SQUARE") >= 0)
+ value = 0xBC - 0xE5;
+ else if (s.IndexOf ("VERTICAL RECTANGLE") >= 0)
+ value = 0xBE - 0xE5;
+ else if (s.IndexOf ("RECTANGLE") >= 0)
+ value = 0xBD - 0xE5;
+ else if (s.IndexOf ("PARALLELOGRAM") >= 0)
+ value = 0xBF - 0xE5;
+ else if (s.IndexOf ("TRIANGLE") >= 0) {
+ if (s.IndexOf ("UP-POINTING") >= 0)
+ value = 0xC0 - 0xE5;
+ else if (s.IndexOf ("RIGHT-POINTING") >= 0)
+ value = 0xC1 - 0xE5;
+ else if (s.IndexOf ("DOWN-POINTING") >= 0)
+ value = 0xC2 - 0xE5;
+ else if (s.IndexOf ("LEFT-POINTING") >= 0)
+ value = 0xC3 - 0xE5;
+ }
+ else if (s.IndexOf ("POINTER") >= 0) {
+ if (s.IndexOf ("RIGHT-POINTING") >= 0)
+ value = 0xC4 - 0xE5;
+ else if (s.IndexOf ("LEFT-POINTING") >= 0)
+ value = 0xC5 - 0xE5;
+ }
+ else if (s.IndexOf ("DIAMOND") >= 0)
+ value = 0xC6 - 0xE5;
+ else if (s.IndexOf ("FISHEYE") >= 0)
+ value = 0xC7 - 0xE5;
+ else if (s.IndexOf ("LOZENGE") >= 0)
+ value = 0xC8 - 0xE5;
+ else if (s.IndexOf ("BULLSEYE") >= 0)
+ value = 0xC9 - 0xE5;
+ else if (s.IndexOf ("CIRCLE") >= 0) {
+ if (cp == 0x25D6) // it could be IndexOf ("LEFT HALF BLACK CIRCLE")
+ value = 0xCA - 0xE5;
+ else if (cp == 0x25D7) // it could be IndexOf ("RIGHT HALF BLACK CIRCLE")
+ value = 0xCB - 0xE5;
+ else
+ value = 0xC9 - 0xE5;
+ }
+ if (0x25DA <= cp && cp <= 0x25E5)
+ value = 0xCD + cp - 0x25DA - 0xE5;
+
// SPECIAL CASE: BOX DRAWING DIAGONAL patterns
switch (cp) {
case 0x2571: value = 0xF; break;
case 0x2572: value = 0x10; break;
case 0x2573: value = 0x11; break;
}
- if (value >= 0)
+ if (value != 0)
boxValues.Add (new DictionaryEntry (
cp, value));
}
sortableCharNames.Add (new DictionaryEntry (
cp, name.Substring (7)));
+ if (Char.GetUnicodeCategory ((char) cp) ==
+ UnicodeCategory.MathSymbol) {
+ if (name.StartsWith ("CIRCLED "))
+ diacritical [cp] = 0xEE;
+ if (name.StartsWith ("SQUARED "))
+ diacritical [cp] = 0xEF;
+ }
+
// diacritical weights by character name
if (diacritics.Length != diacriticWeights.Length)
throw new Exception (String.Format ("Should not happen. weights are {0} while labels are {1}", diacriticWeights.Length, diacritics.Length));
(s.IndexOf ("DIGIT") > 0 || s.IndexOf ("NUMBER") > 0))
diacritical [cp] |= 0xF4;
- // Cyrillic letter name
- if (0x0430 <= cp && cp <= 0x0486 &&
- Char.IsLetter ((char) cp)) {
- byte value = (byte) (cyrillicNameMap.Count * 3 + 0x06);
- // Get primary letter name i.e.
- // XXX part of CYRILLIC LETTER XXX yyy
- // e.g. "IZHITSA" for "IZHITSA DOUBLE GRAVE".
- string letterName =
- name.Substring (name.IndexOf ("LETTER ") + 7);
- int tmpIdx = letterName.IndexOf (' ');
- letterName = tmpIdx < 0 ? letterName : letterName.Substring (0, tmpIdx);
-//Console.Error.WriteLine ("Arabic name for {0:X04} is {1}", cp, letterName);
- if (cyrillicNameMap.ContainsKey (letterName))
- value = (byte) cyrillicLetterPrimaryValues [cyrillicNameMap [letterName]];
- else
- cyrillicNameMap [letterName] = cp;
-
- cyrillicLetterPrimaryValues [cp] = value;
- }
-
// Arabic letter name
if (0x0621 <= cp && cp <= 0x064A &&
Char.GetUnicodeCategory ((char) cp)
void ParseScripts (string filename)
{
- ArrayList cyrillic = new ArrayList ();
ArrayList gurmukhi = new ArrayList ();
ArrayList gujarati = new ArrayList ();
ArrayList georgian = new ArrayList ();
continue;
switch (value) {
- case "Cyrillic":
- for (int x = cp; x <= cpEnd; x++)
- if (!IsIgnorable (x))
- cyrillic.Add ((char) x);
- break;
case "Gurmukhi":
for (int x = cp; x <= cpEnd; x++)
if (!IsIgnorable (x))
}
}
}
- cyrillic.Sort (UCAComparer.Instance);
gurmukhi.Sort (UCAComparer.Instance);
gujarati.Sort (UCAComparer.Instance);
georgian.Sort (UCAComparer.Instance);
thaana.Sort (UCAComparer.Instance);
- orderedCyrillic = (char []) cyrillic.ToArray (typeof (char));
orderedGurmukhi = (char []) gurmukhi.ToArray (typeof (char));
orderedGujarati = (char []) gujarati.ToArray (typeof (char));
orderedGeorgian = (char []) georgian.ToArray (typeof (char));
category = "ja";
arr = cjkJA;
offset = 0;//char.MaxValue - arr.Length;
- doc.Load (jaXML);
- s = doc.SelectSingleNode ("/ldml/collations/collation/rules/pc").InnerText;
+
+ // SPECIAL CASES
+ arr [0x4EDD] = 0x8002; // Chinese repetition mark?
+ arr [0x337B] = 0x8004; // Those 4 characters are Gengou
+ arr [0x337E] = 0x8005;
+ arr [0x337D] = 0x8006;
+ arr [0x337C] = 0x8007;
+
v = 0x8008;
- foreach (char c in s) {
+ foreach (JISCharacter jc in jisJapanese) {
+ if (jc.JIS < 0x8800)
+ continue;
+ char c = (char) jc.CP;
+
if (c < '\u4E00')
Console.Error.WriteLine ("---- warning: for {0} {1:X04} is omitted which should be {2:X04}", category, (int) c, v);
else {
arr [(int) c - offset] = (ushort) v++;
if (v % 256 == 0)
v += 2;
+
+ // SPECIAL CASES:
+ if (c == '\u662D') // U+337C
+ continue;
+ if (c == '\u5927') // U+337D
+ continue;
+ if (c == '\u5E73') // U+337B
+ continue;
+ if (c == '\u660E') // U+337E
+ continue;
+ if (c == '\u9686') // U+F9DC
+ continue;
+
+ // FIXME: there are still remaining
+ // characters after U+FA0C.
+// for (int k = 0; k < char.MaxValue; k++) {
+ for (int k = 0; k < '\uFA0D'; k++) {
+ if (decompIndex [k] == 0 || IsIgnorable (k))
+ continue;
+ if (decompValues [decompIndex [k]] == c /*&&
+ decompLength [k] == 1*/ ||
+ decompLength [k] == 3 &&
+ decompValues [decompIndex [k] + 1] == c) {
+ arr [k - offset] = (ushort) v++;
+ if (v % 256 == 0)
+ v += 2;
+ }
+ }
}
}
}
}
- void ModifyParsedValues ()
+ void ModifyUnidata ()
{
- // number, secondary weights
- byte weight = 0x38;
- int [] numarr = numberSecondaryWeightBounds;
- for (int i = 0; i < numarr.Length; i += 2, weight++)
- for (int cp = numarr [i]; cp < numarr [i + 1]; cp++)
- if (Char.IsNumber ((char) cp))
- diacritical [cp] = weight;
-
// Modify some decomposition equivalence
decompType [0xFE31] = 0;
decompIndex [0xFE31] = 0;
for (int i = 0x3260; i <= 0x327B; i++)
diacritical [i] = 0xC;
+ // LAMESPEC: these remapping should not be done.
+ // Windows have incorrect CJK compat mappings.
+ decompValues [decompIndex [0x32A9]] = 0x91AB;
+ decompLength [0x323B] = 1;
+ decompValues [decompIndex [0x323B]] = 0x5B78;
+ decompValues [decompIndex [0x32AB]] = 0x5B78;
+ decompValues [decompIndex [0x32A2]] = 0x5BEB;
+ decompLength [0x3238] = 1;
+ decompValues [decompIndex [0x3238]] = 0x52DE;
+ decompValues [decompIndex [0x3298]] = 0x52DE;
+
+ // LAMESPEC: custom remapping (which is not bugs but not fine, non-standard compliant things)
+ decompIndex [0xFA0C] = decompIndex [0xF929]; // borrow U+F929 room (being empty)
+ decompValues [decompIndex [0xFA0C]] = 0x5140;
+ decompLength [0xFA0C] = 1;
+ decompIndex [0xF929] = decompLength [0xF929] = 0;
+
+ decompValues [decompIndex [0xF92C]] = 0x90DE;
+ }
+
+ void ModifyParsedValues ()
+ {
+ // number, secondary weights
+ byte weight = 0x38;
+ int [] numarr = numberSecondaryWeightBounds;
+ for (int i = 0; i < numarr.Length; i += 2, weight++)
+ for (int cp = numarr [i]; cp < numarr [i + 1]; cp++)
+ if (Char.IsNumber ((char) cp))
+ diacritical [cp] = weight;
+
// Update name part of named characters
for (int i = 0; i < sortableCharNames.Count; i++) {
DictionaryEntry de =
map [i] = new CharMapEntry (
0x1, 0x1, diacritical [i]);
- fillIndex [0x1] = 0xAC;
- for (int i = 0x07A6; i <= 0x07B0; i++)
+ fillIndex [0x1] = 0x94;
+ // syriac dotted nonspacing marks
+ AddCharMap ('\u0732', 0x1, 1);
+ AddCharMap ('\u0735', 0x1, 1);
+ AddCharMap ('\u0738', 0x1, 1);
+ AddCharMap ('\u0739', 0x1, 1);
+ AddCharMap ('\u073C', 0x1, 1);
+ fillIndex [0x1] = 0x9F;
+ for (int i = 0x0730; i <= 0x07B0; i++)
+ if (!IsIgnorable (i) && !map [i].Defined)
+ AddCharMap ((char) i, 0x1, 1);
+
+ fillIndex [0x1] = 0x0C;
+ for (int i = 0x0EC8; i <= 0x0ECD; i++)
if (!IsIgnorable (i))
AddCharMap ((char) i, 0x1, 1);
fillIndex [0x1] = 0xDC;
for (int i = 0x20d0; i <= 0x20e1; i++)
AddCharMap ((char) i, 0x1, 1);
+
+ // They are not part of Nonspacing marks, but have
+ // only diacritical weight.
+ for (int i = 0x3099; i <= 0x309C; i++)
+ map [i] = new CharMapEntry (1, 1, 1);
+ map [0x309D] = new CharMapEntry (0xFF, 0xFF, 1);
+ map [0x309E] = new CharMapEntry (0xFF, 0xFF, 1);
+ for (int i = 0x30FC; i <= 0x30FE; i++)
+ map [i] = new CharMapEntry (0xFF, 0xFF, 1);
+
#endregion
boxLv2 [i] = 3;
foreach (DictionaryEntry de in boxValues) {
int cp = (int) de.Key;
- int idx = (int) de.Value;
+ int off = (int) de.Value;
if (map [cp].Defined)
continue;
- fillIndex [0x9] = (byte) (0xE5 + idx);
- AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [idx]);
- boxLv2 [idx]++;
+ if (off < 0) {
+ fillIndex [0x9] = (byte) (0xE5 + off);
+ AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [-off]++);
+ }
+ else {
+ fillIndex [0x9] = (byte) (0xE5 + off);
+ AddCharMapGroup ((char) cp, 0x9, 0, boxLv2 [off]++);
+ }
}
// Some special characters (slanted)
fillIndex [0x9] = 0xF4;
uc = Char.GetUnicodeCategory ((char) cp);
if (!IsIgnorable (cp) &&
uc == UnicodeCategory.CurrencySymbol &&
- cp != '$')
+ cp != '$' ||
+ cp == 0xAC)
AddCharMapGroup ((char) cp, 0xA, 1, 0);
}
// byte other symbols
continue; // SPECIAL: skip FIXME: why?
uc = Char.GetUnicodeCategory ((char) cp);
if (!IsIgnorable (cp) &&
- uc == UnicodeCategory.OtherSymbol)
+ uc == UnicodeCategory.OtherSymbol ||
+ cp == '\u00B5' || cp == '\u00B7')
AddCharMapGroup ((char) cp, 0xA, 1, 0);
}
+ fillIndex [0xA] = 0x0F; // FIXME: it won't be needed
+ for (int cp = 0x2020; cp <= 0x2031; cp++)
+ if (Char.IsPunctuation ((char) cp))
+ AddCharMap ((char) cp, 0xA, 1, 0);
+ // SPECIAL CASES: why?
+ AddCharMap ('\u203B', 0xA, 1, 0);
+ AddCharMap ('\u2040', 0xA, 1, 0);
+ AddCharMap ('\u2041', 0xA, 1, 0);
+ AddCharMap ('\u2042', 0xA, 1, 0);
+
+ for (int cp = 0x20A0; cp <= 0x20AB; cp++)
+ AddCharMap ((char) cp, 0xA, 1, 0);
fillIndex [0xA] = 0x2F; // FIXME: it won't be needed
for (int cp = 0x2600; cp <= 0x2613; cp++)
AddCharMap ((char) cp, 0xA, 1, 0);
if (Char.IsLetter ((char) i))
AddLetterMap ((char) i, 0xF, 1);
- // Cyrillic - character name order
- fillIndex [0x10] = 0x6;
-//*
-for (int i = 0; i < orderedCyrillic.Length; i++)
-Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
+ // Cyrillic.
+ // Cyrillic letters are sorted like Latin letters i.e.
+ // containing culture-specific letters between the
+ // standard Cyrillic sequence.
+ //
+ // We can't use UCA here; it has different sorting.
+ char [] orderedCyrillic = new char [] {
+ '\u0430', '\u0431', '\u0432', '\u0433', '\u0434',
+ '\u0452', // DJE for Serbocroatian
+ '\u0435',
+ '\u0454', // IE for Ukrainian
+ '\u0436', '\u0437',
+ '\u0455', // DZE
+ '\u0438',
+ '\u0456', // Byelorussian-Ukrainian I
+ '\u0457', // YI
+ '\u0439',
+ '\u0458', // JE
+ '\u043A', '\u043B',
+ '\u0459', // LJE
+ '\u043C', '\u043D',
+ '\u045A', // NJE
+ '\u043E',
+ // 4E9 goes here.
+ '\u043F', '\u0440', '\u0441', '\u0442',
+ '\u045B', // TSHE for Serbocroatian
+ '\u0443',
+ '\u045E', // Short U for Byelorussian
+ '\u04B1', // Straight U w/ stroke (diacritical!)
+ '\u0444', '\u0445', '\u0446', '\u0447',
+ '\u045F', // DZHE
+ '\u0448', '\u0449', '\u044A', '\u044B', '\u044C',
+ '\u044D', '\u044E', '\u044F'};
+
+ // For some characters here is a map to basic cyrillic
+ // letters. See UnicodeData.txt character names for
+ // the sources. Here I simply declare an equiv. array.
+ // The content characters are map from U+490(,491),
+ // skipping small letters.
+ char [] cymap_src = new char [] {
+ '\u0433', '\u0433', '\u0433', '\u0436',
+ '\u0437', '\u043A', '\u043A', '\u043A',
+ '\u043A', '\u043D', '\u043D', '\u043F',
+ '\u0445', '\u0441', '\u0442', '\u0443',
+ '\u0443', '\u0445', '\u0446', '\u0447',
+ '\u0447', '\u0432', '\u0435', '\u0435',
+ '\u0406', '\u0436', '\u043A', '\u043D',
+ '\u0447', '\u0435'};
+
+ fillIndex [0x10] = 0x8D;
+ for (int i = 0x0460; i < 0x0481; i++) {
+ if (Char.IsLetter ((char) i)) {
+ if (i == 0x0476)
+ // U+476/477 have the same
+ // primary weight as U+474/475.
+ fillIndex [0x10] -= 3;
+ AddLetterMap ((char) i, 0x10, 3);
+ }
+ }
- // table which is moslty from UCA DUCET.
+ fillIndex [0x10] = 0x6;
for (int i = 0; i < orderedCyrillic.Length; i++) {
char c = Char.ToUpper (orderedCyrillic [i], CultureInfo.InvariantCulture);
if (!IsIgnorable ((int) c) &&
- c <= '\u045C' &&
- Char.IsLetter (c)) {
+ Char.IsLetter (c) &&
+ !map [c].Defined) {
AddLetterMap (c, 0x10, 0);
fillIndex [0x10] += 3;
}
}
- /*
- for (int i = 0x0460; i < 0x0481; i++) {
- if (Char.IsLetter ((char) i)) {
- AddLetterMap ((char) i, 0x10, 0);
- fillIndex [0x10] += 3;
- }
- }
- */
-/*
- for (int i = 0x0400; i <= 0x0486; i++) {
- if (!Char.IsLetter ((char) i)) {
-// AddCharMap ((char) i, 0x1, 1);
- continue;
- }
- if (!cyrillicLetterPrimaryValues.ContainsKey (i)) {
- Console.Error.WriteLine ("no value for {0:x04}", i);
- continue;
- }
- fillIndex [0x10] =
- (byte) cyrillicLetterPrimaryValues [i];
- AddLetterMap ((char) i, 0x10, 0);
+
+ for (int i = 0; i < cymap_src.Length; i++) {
+ char c = cymap_src [i];
+ fillIndex [0x10] = map [c].Level1;
+ AddLetterMap ((char) (0x0490 + i * 2),
+ 0x10, 0);
}
-*/
// Armenian
fillIndex [0x11] = 0x3;
// Hebrew
// -Letters
- fillIndex [0x12] = 0x3;
+ fillIndex [0x12] = 0x2;
for (int i = 0x05D0; i < 0x05FF; i++)
if (Char.IsLetter ((char) i))
AddLetterMap ((char) i, 0x12, 1);
// -Accents
fillIndex [0x1] = 0x3;
- for (int i = 0x0591; i <= 0x05C2; i++)
+ for (int i = 0x0591; i <= 0x05C2; i++) {
+ if (i == 0x05A3 || i == 0x05BB)
+ fillIndex [0x1]++;
if (i != 0x05BE)
AddCharMap ((char) i, 0x1, 1);
+ }
// Arabic
fillIndex [0x1] = 0x8E;
// Insert 3001 after ',' and 3002 after '.'
if (i == 0x2C)
AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
- else if (i == 0x2E) {
- fillIndex [0x7]--;
+ else if (i == 0x2E)
AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
- }
else if (i == 0x3A)
AddCharMap ('\uFE30', 0x7, 1, 0);
}
case UnicodeCategory.FinalQuotePunctuation:
case UnicodeCategory.ModifierSymbol:
// SPECIAL CASES: // 0xA
- if (0x2020 <= i && i <= 0x2042)
+ if (0x2020 <= i && i <= 0x2031)
continue;
AddCharMapGroup ((char) i, 0x7, 1, 0);
break;
AddCharMapGroup ('\u22B0', 0x8, 1, 0); // precedes under relation
for (int cp = 0; cp < 0x2300; cp++) {
- if (cp == 0x200)
- cp = 0x2200; // skip to 2200
if (cp == 0xAC) // SPECIAL CASE: skip
continue;
+ if (cp == 0x200) {
+ cp = 0x2200; // skip to 2200
+ fillIndex [0x8] = 0x21;
+ }
+ if (cp == 0x2295)
+ fillIndex [0x8] = 0x3;
+ if (cp == 0x22B2)
+ fillIndex [0x8] = 0xB9;
if (!map [cp].Defined &&
// Char.GetUnicodeCategory ((char) cp) ==
// UnicodeCategory.MathSymbol)
Char.IsSymbol ((char) cp))
- AddCharMapGroup ((char) cp, 0x8, 1, 0);
+ AddCharMapGroup ((char) cp, 0x8, 1, diacritical [cp]);
// SPECIAL CASES: no idea why Windows sorts as such
switch (cp) {
case 0x3E: