2005-04-25 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-char-mapping-source.cs
1 //
2 // create-char-mapping-source.cs - creates canonical/compatibility mappings.
3 //
4
5 using System;
6 using System.Collections;
7 using System.Globalization;
8 using System.IO;
9
10 namespace Mono.Globalization.Unicode
11 {
12         internal class CharMappingGenerator
13         {
14                 class CharMapping
15                 {
16                         public CharMapping (int cp, int mapIndex, bool isCanonical)
17                         {
18                                 MapIndex = mapIndex;
19                                 CodePoint = cp;
20                                 IsCanonical = isCanonical;
21                         }
22
23                         public int MapIndex;
24                         public readonly int CodePoint;
25                         public readonly bool IsCanonical;
26                 }
27
28                 class CharMappingComparer : IComparer
29                 {
30                         CharMappingGenerator parent;
31
32                         public CharMappingComparer (CharMappingGenerator g)
33                         {
34                                 parent = g;
35                         }
36
37                         // Note that this never considers IsCanonical
38                         public int Compare (object o1, object o2)
39                         {
40                                 CharMapping c1 = (CharMapping) o1;
41                                 CharMapping c2 = (CharMapping) o2;
42                                 return CompareArray (c1.MapIndex, c2.MapIndex);
43                         }
44
45                         // Note that this never considers IsCanonical
46                         public int CompareArray (int idx1, int idx2)
47                         {
48                                 for (int i = 0; parent.mappedChars [idx2 + i] != 0; i++) {
49                                         int l = parent.mappedChars [idx1 + i];
50                                         int r = parent.mappedChars [idx2 + i];
51                                         if (l != r)
52                                                 return l - r;
53                                 }
54                                 return 0;
55                         }
56                 }
57
58                 CharMappingComparer comparer;
59
60                 private int lineCount = 0;
61                 int mappedCharCount = 1;
62                 int [] mappedChars = new int [100];
63                 int [] mapIndex = new int [0x5000];
64
65                 ArrayList mappings = new ArrayList ();
66                 ArrayList widthSensitives = new ArrayList ();
67
68                 public CharMappingGenerator ()
69                 {
70                         comparer = new CharMappingComparer (this);
71                 }
72
73                 public static void Main ()
74                 {
75                         new CharMappingGenerator ().Run ();
76                 }
77
78                 private void Run ()
79                 {
80                         try {
81                                 Parse ();
82                                 Compress ();
83                                 Serialize ();
84                         } catch (Exception ex) {
85                                 throw new InvalidOperationException ("Internal error at line " + lineCount + " : " + ex);
86                         }
87                 }
88
89                 private void Compress ()
90                 {
91                         mappings.Sort (comparer);
92
93                         // mappedChars[0] = 0. This assures that value 0 of
94                         // mapIndex means there is no mapping.
95                         int count = 1;
96                         int [] compressedMapping = new int [mappedCharCount];
97                         // Update map index.
98                         int [] newMapIndex = new int [mappings.Count];
99                         for (int mi = 0; mi < mappings.Count; mi++) {
100                                 CharMapping m = (CharMapping) mappings [mi];
101                                 if (mi > 0 && 0 == comparer.Compare (
102                                         mappings [mi - 1], mappings [mi])) {
103                                         newMapIndex [mi] = newMapIndex [mi - 1];
104                                         continue;
105                                 }
106                                 newMapIndex [mi] = count;
107                                 for (int i = m.MapIndex; mappedChars [i] != 0; i++)
108                                         compressedMapping [count++] = mappedChars [i];
109                                 compressedMapping [count++] = 0;
110                         }
111                         for (int mi = 0; mi < mappings.Count; mi++)
112                                 ((CharMapping) mappings [mi]).MapIndex = newMapIndex [mi];
113
114                         int [] compressedMapIndex = new int [mapIndex.Length];
115                         foreach (CharMapping m in mappings)
116                                 if (m.CodePoint <= char.MaxValue)
117                                         compressedMapIndex [MapIdx (m.CodePoint)] = m.MapIndex;
118
119                         mappedChars = compressedMapping;
120                         mapIndex = compressedMapIndex;
121                         mappedCharCount = count;
122                 }
123
124                 private void Serialize ()
125                 {
126                         // mappedChars
127                         Console.WriteLine ("static readonly int [] mappedChars = new int [] {");
128                         DumpArray (mappedChars, mappedCharCount, false);
129                         Console.WriteLine ("};");
130
131                         // mapIndex
132                         Console.WriteLine ("static readonly short [] mapIndex= new short [] {");
133                         DumpArray (mapIndex, NormalizationTableUtil.MapCount, true);
134                         Console.WriteLine ("};");
135
136                         // GetPrimaryCompositeHelperIndex ()
137                         Console.WriteLine ("static short GetPrimaryCompositeHelperIndex (int head)");
138                         Console.WriteLine ("{");
139                         int currentHead = 0;
140                         Console.WriteLine ("    switch (head) {");
141                         foreach (CharMapping m in mappings) {
142                                 if (mappedChars [m.MapIndex] == currentHead)
143                                         continue; // has the same head
144 // FIXME: should be applied
145 //                              if (!m.IsCanonical)
146 //                                      continue;
147                                 currentHead = mappedChars [m.MapIndex];
148                                 Console.WriteLine ("    case 0x{0:X}: return 0x{1:X};", currentHead, m.MapIndex);
149                         }
150                         Console.WriteLine ("    }");
151                         Console.WriteLine ("    return 0;");
152                         Console.WriteLine ("}");
153
154                         // GetPrimaryCompositeFromMapIndex ()
155                         Console.WriteLine ("static int GetPrimaryCompositeFromMapIndex (int idx)");
156                         Console.WriteLine ("{");
157                         Console.WriteLine ("    switch (idx) {");
158                         int currentIndex = -1;
159                         foreach (CharMapping m in mappings) {
160                                 if (m.MapIndex == currentIndex)
161                                         continue;
162                                 if (!m.IsCanonical)
163                                         continue;
164                                 Console.WriteLine ("    case 0x{0:X}: return 0x{1:X};", m.MapIndex, m.CodePoint);
165                                 currentIndex = m.MapIndex;
166                         }
167                         Console.WriteLine ("    }");
168                         Console.WriteLine ("    return 0;");
169                         Console.WriteLine ("}");
170
171                         // WidthSensitives
172                         Console.WriteLine ("public static int ToWidthInsensitive (int i)");
173                         Console.WriteLine ("{");
174                         Console.WriteLine ("    if (i != 0x3000 && i < 0xFF00)");
175                         Console.WriteLine ("            return i;");
176                         Console.WriteLine ("    switch (i) {");
177                         foreach (int i in widthSensitives)
178                                 Console.WriteLine ("    case 0x{0:X}:", i);
179                         Console.WriteLine ("            return mappedChars [NormalizationTableUtil.MapIdx (i)];");
180                         Console.WriteLine ("    }");
181                         Console.WriteLine ("    return i;");
182                         Console.WriteLine ("}");
183                 }
184
185                 private void DumpArray (int [] array, int count, bool getCP)
186                 {
187                         if (array.Length < count)
188                                 throw new ArgumentOutOfRangeException ("count");
189                         for (int i = 0; i < count; i++) {
190                                 if (array [i] == 0)
191                                         Console.Write ("0, ");
192                                 else
193                                         Console.Write ("0x{0:X}, ", array [i]);
194                                 if (i % 16 == 15) {
195                                         int l = getCP ? NormalizationTableUtil.MapCP (i) : i;
196                                         Console.WriteLine ("// {0:X04}-{1:X04}", l - 15, l);
197                                 }
198                         }
199                 }
200
201                 private void Parse ()
202                 {
203                         TextReader reader = Console.In;
204                         while (reader.Peek () != -1) {
205                                 string line = reader.ReadLine ();
206                                 lineCount++;
207                                 int idx = line.IndexOf ('#');
208                                 if (idx >= 0)
209                                         line = line.Substring (0, idx);
210                                 if (line.Length == 0)
211                                         continue;
212                                 int n = 0;
213                                 while (Char.IsDigit (line [n]) || Char.IsLetter (line [n]))
214                                         n++;
215                                 int cp = int.Parse (line.Substring (0, n), NumberStyles.HexNumber);
216
217                                 string [] values = line.Substring (n + 1).Split (';');
218                                 string canon = values [4];
219                                 string combiningCategory = canon.IndexOf ('>') < 0 ? "" : canon.Substring (1, canon.IndexOf ('>') - 1);
220                                 string mappedCharsValue = canon;
221                                 if (combiningCategory.Length > 0)
222                                         mappedCharsValue = canon.Substring (combiningCategory.Length + 2).Trim ();
223                                 if (mappedCharsValue.Length > 0) {
224                                         switch (combiningCategory) {
225                                         case "narrow":
226                                         case "wide":
227                                                 widthSensitives.Add (cp);
228                                                 break;
229                                         }
230                                         mappings.Add (new CharMapping (cp,
231                                                 mappedCharCount, 
232                                                 combiningCategory.Length == 0));
233                                         SetCanonProp (cp, -1, mappedCharCount);
234                                         foreach (string v in mappedCharsValue.Split (' '))
235                                                 AddMappedChars (cp,
236                                                         int.Parse (v, NumberStyles.HexNumber));
237                                         AddMappedChars (cp, 0);
238                                 }
239                         }
240                         if (reader != Console.In)
241                                 reader.Close ();
242                 }
243
244                 private void AddMappedChars (int cp, int cv)
245                 {
246                         if (mappedCharCount == mappedChars.Length) {
247                                 int [] tmp = new int [mappedCharCount * 2];
248                                 Array.Copy (mappedChars, tmp, mappedCharCount);
249                                 mappedChars = tmp;
250                         }
251                         mappedChars [mappedCharCount++] = cv;
252                 }
253
254                 private void SetCanonProp (int cp, int cpEnd, int flag)
255                 {
256                         int idx = MapIdx (cp);
257                         if (cpEnd < 0)
258                                 mapIndex [idx] = flag;
259                         else {
260                                 int idxEnd = MapIdx (cpEnd);
261                                 for (int i = idx; i <= idxEnd; i++)
262                                         mapIndex [i] = flag;
263                         }
264                 }
265
266                 private int MapIdx (int cp)
267                 {
268                         return NormalizationTableUtil.MapIdx (cp);
269                 }
270         }
271 }
272