2 // create-normalization-source.cs : creates normalization information table.
5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 using System.Globalization;
33 namespace Mono.Globalization.Unicode
35 internal class NormalizationCodeGenerator
37 private int lineCount = 0;
38 int singleCount = 1, multiCount = 1, propValueCount = 1;
39 // int [] singleNorm = new int [550];
40 // int [] multiNorm = new int [280];
41 int [] prop = new int [char.MaxValue]; // maybe it will be enough when we use CodePointIndexer
42 int [] propValues = new int [1024];
44 public const int NoNfd = 1;
45 public const int NoNfkd = 2;
46 public const int NoNfc = 4;
47 public const int MaybeNfc = 8;
48 public const int NoNfkc = 16;
49 public const int MaybeNfkc = 32;
50 public const int FullCompositionExclusion = 64;
51 // public const int ExpandOnNfd = 128;
52 // public const int ExpandOnNfc = 256;
53 // public const int ExpandOnNfkd = 512;
54 // public const int ExpandOnNfkc = 1024;
56 public static void Main ()
58 new NormalizationCodeGenerator ().Run ();
67 } catch (Exception ex) {
68 throw new InvalidOperationException ("Internal error at line " + lineCount + " : " + ex);
72 private void MakeIndex ()
74 for (int i = 0; i < prop.Length; i++) {
76 for (int v = 0; v < propValueCount; v++)
77 if (propValues [v] == prop [i]) {
84 if (propValueCount == propValues.Length) {
85 int [] tmp = new int [propValueCount * 2];
86 Array.Copy (propValues, tmp, propValueCount);
89 propValues [propValueCount] = prop [i];
90 prop [i] = propValueCount++;
94 private void Serialize ()
97 Console.WriteLine ("static readonly int [] singleNorm = new int [] {");
98 DumpArray (singleNorm, singleCount, false);
99 Console.WriteLine ("};");
100 Console.WriteLine ("static readonly int [] multiNorm = new int [] {");
101 DumpArray (multiNorm, multiCount, false);
102 Console.WriteLine ("};");
104 Console.WriteLine ("static readonly byte [] propIdx = new byte [] {");
105 DumpArray (prop, NormalizationTableUtil.PropCount, true);
106 Console.WriteLine ("};");
107 Console.WriteLine ("static readonly uint [] propValue = new uint [] {");
108 DumpArray (propValues, propValueCount, false);
109 Console.WriteLine ("};");
112 private void DumpArray (int [] array, int count, bool getCP)
114 if (array.Length < count)
115 throw new ArgumentOutOfRangeException ("count");
116 for (int i = 0; i < count; i++) {
117 uint value = (uint) array [i];
119 Console.Write ("{0}, ", value);
121 Console.Write ("0x{0:X}, ", value);
123 int l = getCP ? NormalizationTableUtil.PropCP (i) : i;
124 Console.WriteLine ("// {0:X04}-{1:X04}", l - 15, l);
129 private void Parse ()
131 TextReader reader = Console.In;
132 while (reader.Peek () != -1) {
133 string line = reader.ReadLine ();
135 int idx = line.IndexOf ('#');
137 line = line.Substring (0, idx);
138 if (line.Length == 0)
141 while (Char.IsDigit (line [n]) || Char.IsLetter (line [n]))
143 int cp = int.Parse (line.Substring (0, n), NumberStyles.HexNumber);
144 // Windows does not handle surrogate characters.
149 if (line [n] == '.' && line [n + 1] == '.')
150 cpEnd = int.Parse (line.Substring (n + 2, n), NumberStyles.HexNumber);
151 int nameStart = line.IndexOf (';') + 1;
152 int valueStart = line.IndexOf (';', nameStart) + 1;
153 string name = valueStart == 0 ? line.Substring (nameStart) :
154 line.Substring (nameStart, valueStart - nameStart - 1);
156 string values = valueStart > 0 ?
157 line.Substring (valueStart).Trim () : "";
159 case "Full_Composition_Exclusion":
160 SetProp (cp, cpEnd, FullCompositionExclusion);
163 if (cp != 0xAC00) // Hangul Syllables are computed
164 SetProp (cp, cpEnd, NoNfd);
167 SetProp (cp, cpEnd, (values == "M") ?
171 if (cp != 0xAC00) // Hangul Syllables are computed
172 SetProp (cp, cpEnd, NoNfkd);
175 SetProp (cp, cpEnd, (values == "M") ?
179 case "Expands_On_NFD":
180 if (cp != 0xAC00) // Hangul Syllables are computed
181 SetProp (cp, cpEnd, ExpandOnNfd);
183 case "Expands_On_NFC":
184 SetProp (cp, cpEnd, ExpandOnNfc);
186 case "Expands_On_NFKD":
187 if (cp != 0xAC00) // Hangul Syllables are computed
188 SetProp (cp, cpEnd, ExpandOnNfkd);
190 case "Expands_On_NFKC":
191 SetProp (cp, cpEnd, ExpandOnNfkc);
196 int v1 = 0, v2 = 0, v3 = 0, v4 = 0;
197 foreach (string s in values.Split (' ')) {
198 if (s.Trim ().Length == 0)
200 int v = int.Parse (s, NumberStyles.HexNumber);
210 throw new NotSupportedException (String.Format ("more than 4 values in FC_NFKC: {0:x}", cp));
212 SetNFKC (cp, cpEnd, v1, v2, v3, v4);
220 private void SetProp (int cp, int cpEnd, int flag)
222 int idx = NormalizationTableUtil.PropIdx (cp);
226 int idxEnd = NormalizationTableUtil.PropIdx (cpEnd);
227 for (int i = idx; i <= idxEnd; i++)
233 private void SetNFKC (int cp, int cpEnd, int v1, int v2, int v3, int v4)
237 for (int i = 0; i < singleCount; i++)
238 if (singleNorm [i] == v1) {
243 if (singleNorm.Length == singleCount) {
244 int [] tmp = new int [singleCount << 1];
245 Array.Copy (singleNorm, tmp, singleCount);
249 singleNorm [singleCount++] = v1;
251 SetProp (cp, cpEnd, idx << 16);
253 if (multiNorm.Length == multiCount) {
254 int [] tmp = new int [multiCount << 1];
255 Array.Copy (multiNorm, tmp, multiCount);
259 (int) ((multiCount << 16) | 0xF0000000));
260 multiNorm [multiCount++] = v1;
261 multiNorm [multiCount++] = v2;
262 multiNorm [multiCount++] = v3;
263 multiNorm [multiCount++] = v4;