2005-07-29 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-normalization-source.cs
1 //
2 // create-normalization-source.cs : creates normalization information table.
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 // 
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 // 
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28
29 using System;
30 using System.Globalization;
31 using System.IO;
32
33 namespace Mono.Globalization.Unicode
34 {
35         internal class NormalizationCodeGenerator
36         {
37                 private int lineCount = 0;
38                 int singleCount = 1, multiCount = 1, propValueCount = 1;
39 //              int [] singleNorm = new int [550];
40 //              int [] multiNorm = new int [280];
41                 int [] prop = new int [char.MaxValue]; // maybe it will be enough when we use CodePointIndexer
42                 int [] propValues = new int [1024];
43
44                 public const int NoNfd = 1;
45                 public const int NoNfkd = 2;
46                 public const int NoNfc = 4;
47                 public const int MaybeNfc = 8;
48                 public const int NoNfkc = 16;
49                 public const int MaybeNfkc = 32;
50                 public const int FullCompositionExclusion = 64;
51 //              public const int ExpandOnNfd = 128;
52 //              public const int ExpandOnNfc = 256;
53 //              public const int ExpandOnNfkd = 512;
54 //              public const int ExpandOnNfkc = 1024;
55
56                 public static void Main ()
57                 {
58                         new NormalizationCodeGenerator ().Run ();
59                 }
60
61                 private void Run ()
62                 {
63                         try {
64                                 Parse ();
65                                 MakeIndex ();
66                                 Serialize ();
67                         } catch (Exception ex) {
68                                 throw new InvalidOperationException ("Internal error at line " + lineCount + " : " + ex);
69                         }
70                 }
71
72                 private void MakeIndex ()
73                 {
74                         for (int i = 0; i < prop.Length; i++) {
75                                 bool add = true;
76                                 for (int v = 0; v < propValueCount; v++)
77                                         if (propValues [v] == prop [i]) {
78                                                 prop [i] = v;
79                                                 add = false;
80                                                 break;
81                                         }
82                                 if (!add)
83                                         continue;
84                                 if (propValueCount == propValues.Length) {
85                                         int [] tmp = new int [propValueCount * 2];
86                                         Array.Copy (propValues, tmp, propValueCount);
87                                         propValues = tmp;
88                                 }
89                                 propValues [propValueCount] = prop [i];
90                                 prop [i] = propValueCount++;
91                         }
92                 }
93
94                 private void Serialize ()
95                 {
96                         /*
97                         Console.WriteLine ("static readonly int [] singleNorm = new int [] {");
98                         DumpArray (singleNorm, singleCount, false);
99                         Console.WriteLine ("};");
100                         Console.WriteLine ("static readonly int [] multiNorm = new int [] {");
101                         DumpArray (multiNorm, multiCount, false);
102                         Console.WriteLine ("};");
103                         */
104                         Console.WriteLine ("static readonly byte [] propIdx = new byte [] {");
105                         DumpArray (prop, NormalizationTableUtil.PropCount, true);
106                         Console.WriteLine ("};");
107                         Console.WriteLine ("static readonly uint [] propValue = new uint [] {");
108                         DumpArray (propValues, propValueCount, false);
109                         Console.WriteLine ("};");
110                 }
111
112                 private void DumpArray (int [] array, int count, bool getCP)
113                 {
114                         if (array.Length < count)
115                                 throw new ArgumentOutOfRangeException ("count");
116                         for (int i = 0; i < count; i++) {
117                                 uint value = (uint) array [i];
118                                 if (value < 10)
119                                         Console.Write ("{0}, ", value);
120                                 else
121                                         Console.Write ("0x{0:X}, ", value);
122                                 if (i % 16 == 15) {
123                                         int l = getCP ? NormalizationTableUtil.PropCP (i) : i;
124                                         Console.WriteLine ("// {0:X04}-{1:X04}", l - 15, l);
125                                 }
126                         }
127                 }
128
129                 private void Parse ()
130                 {
131                         TextReader reader = Console.In;
132                         while (reader.Peek () != -1) {
133                                 string line = reader.ReadLine ();
134                                 lineCount++;
135                                 int idx = line.IndexOf ('#');
136                                 if (idx >= 0)
137                                         line = line.Substring (0, idx);
138                                 if (line.Length == 0)
139                                         continue;
140                                 int n = 0;
141                                 while (Char.IsDigit (line [n]) || Char.IsLetter (line [n]))
142                                         n++;
143                                 int cp = int.Parse (line.Substring (0, n), NumberStyles.HexNumber);
144                                 // Windows does not handle surrogate characters.
145                                 if (cp >= 0x10000)
146                                         continue;
147
148                                 int cpEnd = -1;
149                                 if (line [n] == '.' && line [n + 1] == '.')
150                                         cpEnd = int.Parse (line.Substring (n + 2, n), NumberStyles.HexNumber);
151                                 int nameStart = line.IndexOf (';') + 1;
152                                 int valueStart = line.IndexOf (';', nameStart) + 1;
153                                 string name = valueStart == 0 ? line.Substring (nameStart) :
154                                         line.Substring (nameStart, valueStart - nameStart - 1);
155                                 name = name.Trim ();
156                                 string values = valueStart > 0 ?
157                                         line.Substring (valueStart).Trim () : "";
158                                 switch (name) {
159                                 case "Full_Composition_Exclusion":
160                                         SetProp (cp, cpEnd, FullCompositionExclusion);
161                                         break;
162                                 case "NFD_QC":
163                                         if (cp != 0xAC00) // Hangul Syllables are computed
164                                                 SetProp (cp, cpEnd, NoNfd);
165                                         break;
166                                 case "NFC_QC":
167                                         SetProp (cp, cpEnd, (values == "M") ?
168                                                 MaybeNfc :NoNfc);
169                                         break;
170                                 case "NFKD_QC":
171                                         if (cp != 0xAC00) // Hangul Syllables are computed
172                                                 SetProp (cp, cpEnd, NoNfkd);
173                                         break;
174                                 case "NFKC_QC":
175                                         SetProp (cp, cpEnd, (values == "M") ?
176                                                 MaybeNfkc :NoNfkc);
177                                         break;
178                                 /*
179                                 case "Expands_On_NFD":
180                                         if (cp != 0xAC00) // Hangul Syllables are computed
181                                                 SetProp (cp, cpEnd, ExpandOnNfd);
182                                         break;
183                                 case "Expands_On_NFC":
184                                         SetProp (cp, cpEnd, ExpandOnNfc);
185                                         break;
186                                 case "Expands_On_NFKD":
187                                         if (cp != 0xAC00) // Hangul Syllables are computed
188                                                 SetProp (cp, cpEnd, ExpandOnNfkd);
189                                         break;
190                                 case "Expands_On_NFKC":
191                                         SetProp (cp, cpEnd, ExpandOnNfkc);
192                                         break;
193                                 */
194                                 /*
195                                 case "FC_NFKC":
196                                         int v1 = 0, v2 = 0, v3 = 0, v4 = 0;
197                                         foreach (string s in values.Split (' ')) {
198                                                 if (s.Trim ().Length == 0)
199                                                         continue;
200                                                 int v = int.Parse (s, NumberStyles.HexNumber);
201                                                 if (v1 == 0)
202                                                         v1 = v;
203                                                 else if (v2 == 0)
204                                                         v2 = v;
205                                                 else if (v3 == 0)
206                                                         v3 = v;
207                                                 else if (v4 == 0)
208                                                         v4 = v;
209                                                 else
210                                                         throw new NotSupportedException (String.Format ("more than 4 values in FC_NFKC: {0:x}", cp));
211                                         }
212                                         SetNFKC (cp, cpEnd, v1, v2, v3, v4);
213                                         break;
214                                 */
215                                 }
216                         }
217                         reader.Close ();
218                 }
219
220                 private void SetProp (int cp, int cpEnd, int flag)
221                 {
222                         int idx = NormalizationTableUtil.PropIdx (cp);
223                         if (cpEnd < 0)
224                                 prop [idx] |= flag;
225                         else {
226                                 int idxEnd = NormalizationTableUtil.PropIdx (cpEnd);
227                                 for (int i = idx; i <= idxEnd; i++)
228                                         prop [i] |= flag;
229                         }
230                 }
231
232                 /*
233                 private void SetNFKC (int cp, int cpEnd, int v1, int v2, int v3, int v4)
234                 {
235                         if (v2 == 0) {
236                                 int idx = -1;
237                                 for (int i = 0; i < singleCount; i++)
238                                         if (singleNorm [i] == v1) {
239                                                 idx = i;
240                                                 break;
241                                         }
242                                 if (idx < 0) {
243                                         if (singleNorm.Length == singleCount) {
244                                                 int [] tmp = new int [singleCount << 1];
245                                                 Array.Copy (singleNorm, tmp, singleCount);
246                                                 singleNorm = tmp;
247                                                 idx = singleCount;
248                                         }
249                                         singleNorm [singleCount++] = v1;
250                                 }
251                                 SetProp (cp, cpEnd, idx << 16);
252                         } else {
253                                 if (multiNorm.Length == multiCount) {
254                                         int [] tmp = new int [multiCount << 1];
255                                         Array.Copy (multiNorm, tmp, multiCount);
256                                         multiNorm = tmp;
257                                 }
258                                 SetProp (cp, cpEnd,
259                                         (int) ((multiCount << 16) | 0xF0000000));
260                                 multiNorm [multiCount++] = v1;
261                                 multiNorm [multiCount++] = v2;
262                                 multiNorm [multiCount++] = v3;
263                                 multiNorm [multiCount++] = v4;
264                         }
265                 }
266                 */
267         }
268 }
269