2005-07-27 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-normalization-source.cs
1 //
2 // create-normalization-source.cs - creates normalization information table.
3 //
4
5 using System;
6 using System.Globalization;
7 using System.IO;
8
9 namespace Mono.Globalization.Unicode
10 {
11         internal class NormalizationCodeGenerator
12         {
13                 private int lineCount = 0;
14                 int singleCount = 1, multiCount = 1, propValueCount = 1;
15                 int [] singleNorm = new int [550];
16                 int [] multiNorm = new int [280];
17                 int [] prop = new int [char.MaxValue]; // maybe it will be enough when we use CodePointIndexer
18                 int [] propValues = new int [1024];
19
20                 public const int NoNfd = 1;
21                 public const int NoNfkd = 2;
22                 public const int NoNfc = 4;
23                 public const int MaybeNfc = 8;
24                 public const int NoNfkc = 16;
25                 public const int MaybeNfkc = 32;
26                 public const int ExpandOnNfd = 64;
27                 public const int ExpandOnNfc = 128;
28                 public const int ExpandOnNfkd = 256;
29                 public const int ExpandOnNfkc = 512;
30                 public const int FullCompositionExclusion = 1024;
31
32                 public static void Main ()
33                 {
34                         new NormalizationCodeGenerator ().Run ();
35                 }
36
37                 private void Run ()
38                 {
39                         try {
40                                 Parse ();
41                                 MakeIndex ();
42                                 Serialize ();
43                         } catch (Exception ex) {
44                                 throw new InvalidOperationException ("Internal error at line " + lineCount + " : " + ex);
45                         }
46                 }
47
48                 private void MakeIndex ()
49                 {
50                         for (int i = 0; i < prop.Length; i++) {
51                                 bool add = true;
52                                 for (int v = 0; v < propValueCount; v++)
53                                         if (propValues [v] == prop [i]) {
54                                                 prop [i] = v;
55                                                 add = false;
56                                                 break;
57                                         }
58                                 if (!add)
59                                         continue;
60                                 if (propValueCount == propValues.Length) {
61                                         int [] tmp = new int [propValueCount * 2];
62                                         Array.Copy (propValues, tmp, propValueCount);
63                                         propValues = tmp;
64                                 }
65                                 propValues [propValueCount] = prop [i];
66                                 prop [i] = propValueCount++;
67                         }
68                 }
69
70                 private void Serialize ()
71                 {
72                         Console.WriteLine ("static readonly int [] singleNorm = new int [] {");
73                         DumpArray (singleNorm, singleCount, false);
74                         Console.WriteLine ("};");
75                         Console.WriteLine ("static readonly int [] multiNorm = new int [] {");
76                         DumpArray (multiNorm, multiCount, false);
77                         Console.WriteLine ("};");
78                         Console.WriteLine ("static readonly byte [] propIdx = new byte [] {");
79                         DumpArray (prop, NormalizationTableUtil.PropCount, true);
80                         Console.WriteLine ("};");
81                         Console.WriteLine ("static readonly uint [] propValue = new uint [] {");
82                         DumpArray (propValues, propValueCount, false);
83                         Console.WriteLine ("};");
84                 }
85
86                 private void DumpArray (int [] array, int count, bool getCP)
87                 {
88                         if (array.Length < count)
89                                 throw new ArgumentOutOfRangeException ("count");
90                         for (int i = 0; i < count; i++) {
91                                 uint value = (uint) array [i];
92                                 if (value < 10)
93                                         Console.Write ("{0}, ", value);
94                                 else
95                                         Console.Write ("0x{0:X}, ", value);
96                                 if (i % 16 == 15) {
97                                         int l = getCP ? NormalizationTableUtil.PropCP (i) : i;
98                                         Console.WriteLine ("// {0:X04}-{1:X04}", l - 15, l);
99                                 }
100                         }
101                 }
102
103                 private void Parse ()
104                 {
105                         TextReader reader = Console.In;
106                         while (reader.Peek () != -1) {
107                                 string line = reader.ReadLine ();
108                                 lineCount++;
109                                 int idx = line.IndexOf ('#');
110                                 if (idx >= 0)
111                                         line = line.Substring (0, idx);
112                                 if (line.Length == 0)
113                                         continue;
114                                 int n = 0;
115                                 while (Char.IsDigit (line [n]) || Char.IsLetter (line [n]))
116                                         n++;
117                                 int cp = int.Parse (line.Substring (0, n), NumberStyles.HexNumber);
118
119                                 int cpEnd = -1;
120                                 if (line [n] == '.' && line [n + 1] == '.')
121                                         cpEnd = int.Parse (line.Substring (n + 2, n), NumberStyles.HexNumber);
122                                 int nameStart = line.IndexOf (';') + 1;
123                                 int valueStart = line.IndexOf (';', nameStart) + 1;
124                                 string name = valueStart == 0 ? line.Substring (nameStart) :
125                                         line.Substring (nameStart, valueStart - nameStart - 1);
126                                 name = name.Trim ();
127                                 string values = valueStart > 0 ?
128                                         line.Substring (valueStart).Trim () : "";
129                                 switch (name) {
130                                 case "Full_Composition_Exclusion":
131                                         SetProp (cp, cpEnd, FullCompositionExclusion);
132                                         break;
133                                 case "NFD_QC":
134                                         SetProp (cp, cpEnd, NoNfd);
135                                         break;
136                                 case "NFC_QC":
137                                         SetProp (cp, cpEnd, (values == "M") ?
138                                                 MaybeNfc :NoNfc);
139                                         break;
140                                 case "NFKD_QC":
141                                         SetProp (cp, cpEnd, NoNfkd);
142                                         break;
143                                 case "NFKC_QC":
144                                         SetProp (cp, cpEnd, (values == "M") ?
145                                                 MaybeNfkc :NoNfkc);
146                                         break;
147                                 case "Expands_On_NFD":
148                                         SetProp (cp, cpEnd, ExpandOnNfd);
149                                         break;
150                                 case "Expands_On_NFC":
151                                         SetProp (cp, cpEnd, ExpandOnNfc);
152                                         break;
153                                 case "Expands_On_NFKD":
154                                         SetProp (cp, cpEnd, ExpandOnNfkd);
155                                         break;
156                                 case "Expands_On_NFKC":
157                                         SetProp (cp, cpEnd, ExpandOnNfkc);
158                                         break;
159                                 case "FC_NFKC":
160                                         int v1 = 0, v2 = 0, v3 = 0, v4 = 0;
161                                         foreach (string s in values.Split (' ')) {
162                                                 if (s.Trim ().Length == 0)
163                                                         continue;
164                                                 int v = int.Parse (s, NumberStyles.HexNumber);
165                                                 if (v1 == 0)
166                                                         v1 = v;
167                                                 else if (v2 == 0)
168                                                         v2 = v;
169                                                 else if (v3 == 0)
170                                                         v3 = v;
171                                                 else if (v4 == 0)
172                                                         v4 = v;
173                                                 else
174                                                         throw new NotSupportedException (String.Format ("more than 4 values in FC_NFKC: {0:x}", cp));
175                                         }
176                                         SetNFKC (cp, cpEnd, v1, v2, v3, v4);
177                                         break;
178                                 }
179                         }
180                         reader.Close ();
181                 }
182
183                 private void SetProp (int cp, int cpEnd, int flag)
184                 {
185                         int idx = NormalizationTableUtil.PropIdx (cp);
186                         if (cpEnd < 0)
187                                 prop [idx] |= flag;
188                         else {
189                                 int idxEnd = NormalizationTableUtil.PropIdx (cpEnd);
190                                 for (int i = idx; i <= idxEnd; i++)
191                                         prop [i] |= flag;
192                         }
193                 }
194
195                 private void SetNFKC (int cp, int cpEnd, int v1, int v2, int v3, int v4)
196                 {
197                         if (v2 == 0) {
198                                 int idx = -1;
199                                 for (int i = 0; i < singleCount; i++)
200                                         if (singleNorm [i] == v1) {
201                                                 idx = i;
202                                                 break;
203                                         }
204                                 if (idx < 0) {
205                                         if (singleNorm.Length == singleCount) {
206                                                 int [] tmp = new int [singleCount << 1];
207                                                 Array.Copy (singleNorm, tmp, singleCount);
208                                                 singleNorm = tmp;
209                                                 idx = singleCount;
210                                         }
211                                         singleNorm [singleCount++] = v1;
212                                 }
213                                 SetProp (cp, cpEnd, idx << 16);
214                         } else {
215                                 if (multiNorm.Length == multiCount) {
216                                         int [] tmp = new int [multiCount << 1];
217                                         Array.Copy (multiNorm, tmp, multiCount);
218                                         multiNorm = tmp;
219                                 }
220                                 SetProp (cp, cpEnd,
221                                         (int) ((multiCount << 16) | 0xF0000000));
222                                 multiNorm [multiCount++] = v1;
223                                 multiNorm [multiCount++] = v2;
224                                 multiNorm [multiCount++] = v3;
225                                 multiNorm [multiCount++] = v4;
226                         }
227                 }
228         }
229 }
230