2005-05-31 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-char-mapping-source.cs
1 //
2 // create-char-mapping-source.cs - creates canonical/compatibility mappings.
3 //
4
5 using System;
6 using System.Collections;
7 using System.Globalization;
8 using System.IO;
9
10 namespace Mono.Globalization.Unicode
11 {
12         internal class CharMappingGenerator
13         {
14                 class CharMapping
15                 {
16                         public CharMapping (int cp, int mapIndex, bool isCanonical)
17                         {
18                                 MapIndex = mapIndex;
19                                 CodePoint = cp;
20                                 IsCanonical = isCanonical;
21                         }
22
23                         public int MapIndex;
24                         public readonly int CodePoint;
25                         public readonly bool IsCanonical;
26                 }
27
28                 class CharMappingComparer : IComparer
29                 {
30                         CharMappingGenerator parent;
31
32                         public CharMappingComparer (CharMappingGenerator g)
33                         {
34                                 parent = g;
35                         }
36
37                         // Note that this never considers IsCanonical
38                         public int Compare (object o1, object o2)
39                         {
40                                 CharMapping c1 = (CharMapping) o1;
41                                 CharMapping c2 = (CharMapping) o2;
42                                 return CompareArray (c1.MapIndex, c2.MapIndex);
43                         }
44
45                         // Note that this never considers IsCanonical
46                         public int CompareArray (int idx1, int idx2)
47                         {
48                                 for (int i = 0; parent.mappedChars [idx2 + i] != 0; i++) {
49                                         int l = parent.mappedChars [idx1 + i];
50                                         int r = parent.mappedChars [idx2 + i];
51                                         if (l != r)
52                                                 return l - r;
53                                 }
54                                 return 0;
55                         }
56                 }
57
58                 CharMappingComparer comparer;
59
60                 private int lineCount = 0;
61                 int mappedCharCount = 1;
62                 int [] mappedChars = new int [100];
63                 int [] mapIndex = new int [0x5000];
64
65                 ArrayList mappings = new ArrayList ();
66
67                 public CharMappingGenerator ()
68                 {
69                         comparer = new CharMappingComparer (this);
70                 }
71
72                 public static void Main ()
73                 {
74                         new CharMappingGenerator ().Run ();
75                 }
76
77                 private void Run ()
78                 {
79                         try {
80                                 Parse ();
81                                 Compress ();
82                                 Serialize ();
83                         } catch (Exception ex) {
84                                 throw new InvalidOperationException ("Internal error at line " + lineCount + " : " + ex);
85                         }
86                 }
87
88                 private void Compress ()
89                 {
90                         mappings.Sort (comparer);
91
92                         // mappedChars[0] = 0. This assures that value 0 of
93                         // mapIndex means there is no mapping.
94                         int count = 1;
95                         int [] compressedMapping = new int [mappedCharCount];
96                         // Update map index.
97                         int [] newMapIndex = new int [mappings.Count];
98                         for (int mi = 0; mi < mappings.Count; mi++) {
99                                 CharMapping m = (CharMapping) mappings [mi];
100                                 if (mi > 0 && 0 == comparer.Compare (
101                                         mappings [mi - 1], mappings [mi])) {
102                                         newMapIndex [mi] = newMapIndex [mi - 1];
103                                         continue;
104                                 }
105                                 newMapIndex [mi] = count;
106                                 for (int i = m.MapIndex; mappedChars [i] != 0; i++)
107                                         compressedMapping [count++] = mappedChars [i];
108                                 compressedMapping [count++] = 0;
109                         }
110                         for (int mi = 0; mi < mappings.Count; mi++)
111                                 ((CharMapping) mappings [mi]).MapIndex = newMapIndex [mi];
112
113                         int [] compressedMapIndex = new int [mapIndex.Length];
114                         foreach (CharMapping m in mappings)
115                                 if (m.CodePoint <= char.MaxValue)
116                                         compressedMapIndex [MapIdx (m.CodePoint)] = m.MapIndex;
117
118                         mappedChars = compressedMapping;
119                         mapIndex = compressedMapIndex;
120                         mappedCharCount = count;
121                 }
122
123                 private void Serialize ()
124                 {
125                         // mappedChars
126                         Console.WriteLine ("static readonly int [] mappedChars = new int [] {");
127                         DumpArray (mappedChars, mappedCharCount, false);
128                         Console.WriteLine ("};");
129
130                         // mapIndex
131                         Console.WriteLine ("static readonly short [] mapIndex= new short [] {");
132                         DumpArray (mapIndex, NormalizationTableUtil.MapCount, true);
133                         Console.WriteLine ("};");
134
135                         // GetPrimaryCompositeHelperIndex ()
136                         Console.WriteLine ("static short GetPrimaryCompositeHelperIndex (int head)");
137                         Console.WriteLine ("{");
138                         int currentHead = 0;
139                         Console.WriteLine ("    switch (head) {");
140                         foreach (CharMapping m in mappings) {
141                                 if (mappedChars [m.MapIndex] == currentHead)
142                                         continue; // has the same head
143 // FIXME: should be applied
144 //                              if (!m.IsCanonical)
145 //                                      continue;
146                                 currentHead = mappedChars [m.MapIndex];
147                                 Console.WriteLine ("    case 0x{0:X}: return 0x{1:X};", currentHead, m.MapIndex);
148                         }
149                         Console.WriteLine ("    }");
150                         Console.WriteLine ("    return 0;");
151                         Console.WriteLine ("}");
152
153                         // GetPrimaryCompositeFromMapIndex ()
154                         Console.WriteLine ("static int GetPrimaryCompositeFromMapIndex (int idx)");
155                         Console.WriteLine ("{");
156                         Console.WriteLine ("    switch (idx) {");
157                         int currentIndex = -1;
158                         foreach (CharMapping m in mappings) {
159                                 if (m.MapIndex == currentIndex)
160                                         continue;
161                                 if (!m.IsCanonical)
162                                         continue;
163                                 Console.WriteLine ("    case 0x{0:X}: return 0x{1:X};", m.MapIndex, m.CodePoint);
164                                 currentIndex = m.MapIndex;
165                         }
166                         Console.WriteLine ("    }");
167                         Console.WriteLine ("    return 0;");
168                         Console.WriteLine ("}");
169                 }
170
171                 private void DumpArray (int [] array, int count, bool getCP)
172                 {
173                         if (array.Length < count)
174                                 throw new ArgumentOutOfRangeException ("count");
175                         for (int i = 0; i < count; i++) {
176                                 if (array [i] == 0)
177                                         Console.Write ("0, ");
178                                 else
179                                         Console.Write ("0x{0:X}, ", array [i]);
180                                 if (i % 16 == 15) {
181                                         int l = getCP ? NormalizationTableUtil.MapCP (i) : i;
182                                         Console.WriteLine ("// {0:X04}-{1:X04}", l - 15, l);
183                                 }
184                         }
185                 }
186
187                 private void Parse ()
188                 {
189                         TextReader reader = Console.In;
190                         while (reader.Peek () != -1) {
191                                 string line = reader.ReadLine ();
192                                 lineCount++;
193                                 int idx = line.IndexOf ('#');
194                                 if (idx >= 0)
195                                         line = line.Substring (0, idx);
196                                 if (line.Length == 0)
197                                         continue;
198                                 int n = 0;
199                                 while (Char.IsDigit (line [n]) || Char.IsLetter (line [n]))
200                                         n++;
201                                 int cp = int.Parse (line.Substring (0, n), NumberStyles.HexNumber);
202
203                                 string [] values = line.Substring (n + 1).Split (';');
204                                 string canon = values [4];
205 //if (values [2] != "0") Console.Error.WriteLine ("----- {0:X03} : {1:x}", int.Parse (values [2]), cp);
206                                 string combiningCategory = canon.IndexOf ('>') < 0 ? "" : canon.Substring (1, canon.IndexOf ('>') - 1);
207                                 string mappedCharsValue = canon;
208                                 if (combiningCategory.Length > 0)
209                                         mappedCharsValue = canon.Substring (combiningCategory.Length + 2).Trim ();
210                                 if (mappedCharsValue.Length > 0) {
211                                         mappings.Add (new CharMapping (cp,
212                                                 mappedCharCount, 
213                                                 combiningCategory.Length == 0));
214                                         SetCanonProp (cp, -1, mappedCharCount);
215                                         foreach (string v in mappedCharsValue.Split (' '))
216                                                 AddMappedChars (cp,
217                                                         int.Parse (v, NumberStyles.HexNumber));
218                                         AddMappedChars (cp, 0);
219                                 }
220                         }
221                         if (reader != Console.In)
222                                 reader.Close ();
223                 }
224
225                 private void AddMappedChars (int cp, int cv)
226                 {
227                         if (mappedCharCount == mappedChars.Length) {
228                                 int [] tmp = new int [mappedCharCount * 2];
229                                 Array.Copy (mappedChars, tmp, mappedCharCount);
230                                 mappedChars = tmp;
231                         }
232                         mappedChars [mappedCharCount++] = cv;
233                 }
234
235                 private void SetCanonProp (int cp, int cpEnd, int flag)
236                 {
237                         int idx = MapIdx (cp);
238                         if (cpEnd < 0)
239                                 mapIndex [idx] = flag;
240                         else {
241                                 int idxEnd = MapIdx (cpEnd);
242                                 for (int i = idx; i <= idxEnd; i++)
243                                         mapIndex [i] = flag;
244                         }
245                 }
246
247                 private int MapIdx (int cp)
248                 {
249                         return NormalizationTableUtil.MapIdx (cp);
250                 }
251         }
252 }
253