MonoTouch specific initialization for TimeZone (devices). Fix bug #1790
[mono.git] / mcs / class / corlib / Mono.Globalization.Unicode / create-category-table.cs
1 //
2 // create-category-table.cs - Generate Unicode category tables for the
3 // Mono runtime.
4 //
5 // Author:
6 //   Damien Diederen (dd@crosstwine.com)
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
15 //
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
18 //
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 // SYNOPSIS
28 //   create-category-table.exe --dump <dump-file>
29 //   create-category-table.exe --encode <dump-file> <data-name> <h-file>
30 //
31 // DESCRIPTION
32 //   Dump, encode and generate (partially) bi-level category tables
33 //   containing variants of the Unicode category database.
34 //
35 //   With --dump <dump-file>, dump the contents of the hosting
36 //   runtime's database to <dump-file> in an easily-parseable ASCII
37 //   format.
38 //
39 //   With --encode <dump-file> <data-name> <h-file>, load a previously
40 //   generated dump and create the corresponding header file
41 //   containing two static C arrays: '<data_name>' and
42 //   '<data_name>_astral_index'.
43 //
44 //   The main table is linear for code points in the U+0000..U+FFFF
45 //   range; the 'astral_index' portion is necessary to select pages
46 //   related to code points in the astral planes:
47 //
48 //    data [(astral_index [(cp - 0x10000) >> 8] << 8) + (cp & 0xff)]
49
50 using System;
51 using System.Diagnostics;
52 using System.IO;
53 using System.Globalization;
54
55 // No .Generics mean this program can be compiled and run on v1.1
56 // after updating MaxCodePoint and removing Char.ConvertFromUtf32
57 // below.
58 using System.Collections;
59
60 namespace Mono.Globalization.Unicode
61 {
62         public enum Language
63         {
64                 C,
65                 CSharp
66         }
67
68         public class PagedTableEncoder
69         {
70                 public interface IData
71                 {
72                         ushort this [int index]
73                         {
74                                 get;
75                         }
76
77                         int Count
78                         {
79                                 get;
80                         }
81                 }
82
83                 public class ArrayData : PagedTableEncoder.IData
84                 {
85                         public ArrayData (Array data)
86                         {
87                                 this.data = data;
88                         }
89
90                         public ushort this [int index]
91                         {
92                                 get {
93                                         IConvertible value = (IConvertible)data.GetValue (index);
94
95                                         return value.ToUInt16 (null);
96                                 }
97                         }
98
99                         public int Count
100                         {
101                                 get {
102                                         return data.Length;
103                                 }
104                         }
105
106                         Array data;
107                 };
108
109                 class Page
110                 {
111                         public Page (int first_base, int number, ushort [] data)
112                         {
113                                 this.first_base = first_base;
114                                 this.number = number;
115                                 this.data = data;
116                         }
117
118                         public void AddIndexEntry (IndexEntry index_entry)
119                         {
120                                 index_entries.Add (index_entry);
121                         }
122
123                         public bool Contains (ushort [] data)
124                         {
125                                 for (int i = 0; i < data.Length; i++)
126                                         if (this.data [i] != data [i])
127                                                 return false;
128                                 return true;
129                         }
130
131                         public readonly int first_base;
132
133                         public readonly int number;
134
135                         public readonly ushort [] data;
136
137                         public IList index_entries = new ArrayList (2);
138                 }
139
140                 class IndexEntry
141                 {
142                         public IndexEntry (string key, int start, int end, Page page)
143                         {
144                                 this.key = key;
145                                 this.start = start;
146                                 this.end = end;
147                                 this.page = page;
148                         }
149
150                         public readonly string key;
151
152                         public readonly int start;
153
154                         public readonly int end;
155
156                         public readonly Page page;
157                 }
158
159                 class IndexEntriesComparer : IComparer
160                 {
161                         public int Compare (object x, object y)
162                         {
163                                 return ((IndexEntry) x).start - ((IndexEntry) y).start;
164                         }
165                 }
166
167                 class Index
168                 {
169                         public Index (string name, IList entries)
170                         {
171                                 this.name = name;
172                                 this.entries = entries;
173                         }
174
175                         public readonly string name;
176
177                         public readonly IList entries;
178                 }
179
180                 public PagedTableEncoder (int page_bits,
181                                           int value_bits,
182                                           int index_bits,
183                                           bool flat_bmp,
184                                           string no_astral_symbol)
185                 {
186                         Debug.Assert (page_bits >= 4);
187                         Debug.Assert (value_bits == 8 || value_bits == 16);
188                         Debug.Assert (index_bits == 16 || index_bits == 32);
189
190                         this.page_size = 1 << page_bits;
191                         this.value_bits = value_bits;
192                         this.index_bits = index_bits;
193                         this.flat_bmp = flat_bmp;
194                         this.no_astral_symbol = no_astral_symbol;
195                 }
196
197                 public void Process (IData data, string index_name)
198                 {
199                         int end = data.Count;
200
201                         Debug.Assert (!flat_bmp || pages.Count == 0);
202                         Debug.Assert ((end & (page_size - 1)) == 0);
203
204                         IList entries = new ArrayList ();
205
206                         for (int page_base = 0; page_base < end; page_base += page_size) {
207                                 ushort [] page_data = new ushort [page_size];
208
209                                 for (int i = 0; i < page_size; i++) {
210                                         ushort v = data [page_base + i];
211
212                                         page_data[i] = v;
213                                 }
214
215                                 bool indexed = IsIndexed (page_base);
216                                 Page page = GetPageForData (page_base, page_data, indexed);
217
218                                 if (indexed) {
219                                         IndexEntry index_entry = new IndexEntry (index_name, page_base,
220                                                                                  page_base + page_size, page);
221                                         page.AddIndexEntry (index_entry);
222                                         entries.Add (index_entry);
223                                 }
224                         }
225
226                         indices.Add (new Index (index_name, entries));
227                 }
228
229                 Page GetPageForData (int page_base, ushort [] data, bool indexed)
230                 {
231                         if (indexed) {
232                                 // Are we in a hurry?
233                                 foreach (Page page in pages) {
234                                         if (page.Contains (data))
235                                                 return page;
236                                 }
237                         }
238
239                         Page new_page = new Page (page_base, pages.Count, data);
240                         pages.Add (new_page);
241                         return new_page;
242                 }
243
244                 bool IsIndexed (int page_base)
245                 {
246                         return !flat_bmp || page_base > Char.MaxValue;
247                 }
248
249                 public void WriteDefinitions (Language lang, string name, TextWriter w)
250                 {
251                         WriteHeaderComment (w);
252                         WriteDataTable (lang, name, w);
253
254                         foreach (Index index in indices) {
255                                 w.WriteLine ();
256                                 WriteIndexTable (lang, index, name + '_' + index.name, w);
257                         }
258                 }
259
260                 void WriteHeaderComment (TextWriter w)
261                 {
262                         int packed_size = pages.Count * page_size * value_bits / 8;
263                         int total_size = packed_size;
264
265                         w.WriteLine ("/*");
266                         w.WriteLine (" * Value bits: {0}, Page size: {1}", value_bits, page_size);
267                         w.WriteLine (" * Packed table: {0} bytes", packed_size);
268
269                         foreach (Index index in indices) {
270                                 int index_size = index.entries.Count * 2;
271
272                                 w.WriteLine (" * Index {0}: {1} bytes", index.name, index_size);
273
274                                 total_size += index_size;
275                         }
276
277                         w.WriteLine (" * Total: {0} bytes", total_size);
278                         w.WriteLine (" */");
279                 }
280
281                 public string CompoundKey (ArrayList keys)
282                 {
283                         string [] key_array = (string []) keys.ToArray (typeof (string));
284                         Array.Sort (key_array);
285                         return string.Join (", ", key_array);
286                 }
287
288                 public IList CollapseByIndex (IList index_entries)
289                 {
290                         if (index_entries.Count == 0)
291                                 return index_entries;
292
293                         ArrayList entries = new ArrayList (index_entries);
294                         // The comparer is required for a stable sort.
295                         entries.Sort (new IndexEntriesComparer ());
296
297                         IndexEntry first = (IndexEntry) entries [0];
298                         ArrayList keys = new ArrayList ();
299                         keys.Add (first.key);
300                         int start = first.start;
301                         int end = first.end;
302                         Page page = first.page;
303                         IList collapsed = new ArrayList ();
304
305                         for (int i = 1; i < entries.Count; i ++) {
306                                 IndexEntry ie = (IndexEntry) entries [i];
307
308                                 if (ie.start == start && ie.end == end)
309                                         keys.Add (ie.key);
310                                 else {
311                                         collapsed.Add (new IndexEntry (CompoundKey (keys), start, end, page));
312
313                                         keys = new ArrayList ();
314                                         keys.Add (ie.key);
315                                         start = ie.start;
316                                         end = ie.end;
317                                         page = ie.page;
318                                 }
319                         }
320
321                         collapsed.Add (new IndexEntry (CompoundKey (keys), start, end, page));
322                         return collapsed;
323                 }
324
325                 public IList CollapseByRange (IList entries)
326                 {
327                         if (entries.Count == 0)
328                                 return entries;
329
330                         IndexEntry first = (IndexEntry) entries [0];
331                         string key = first.key;
332                         int start = first.start;
333                         int end = first.end;
334                         Page page = first.page;
335                         IList collapsed = new ArrayList ();
336
337                         for (int i = 1; i < entries.Count; i++) {
338                                 IndexEntry ie = (IndexEntry) entries [i];
339
340                                 if (ie.start == end && ie.key == key) {
341                                         end = ie.end;
342                                 } else {
343                                         collapsed.Add (new IndexEntry (key, start, end, page));
344                                         key = ie.key;
345                                         start = ie.start;
346                                         end = ie.end;
347                                         page = ie.page;
348                                 }
349                         }
350
351                         collapsed.Add (new IndexEntry (key, start, end, page));
352                         return collapsed;
353                 }
354
355                 public IList CollapseIndexEntries (IList index_entries)
356                 {
357                         return CollapseByRange (CollapseByIndex (index_entries));
358                 }
359
360                 void WriteDataTable (Language lang, string name, TextWriter w)
361                 {
362                         int n_entries = pages.Count * page_size;
363
364                         if (lang == Language.C)
365                                 w.WriteLine ("static const guint{0} {1} [{2}] = ", value_bits, name, n_entries);
366                         else {
367                                 string type = value_bits == 8 ? "byte" : "ushort";
368
369                                 w.WriteLine ("static readonly {0} [] {1} = new {0} [{2}] ", type, name, n_entries);
370                         }
371
372                         string separator = TABLE_START;
373                         bool has_ifndef = false;
374                         foreach (Page page in pages) {
375                                 has_ifndef |= MaybeWriteIfndef (page, ref separator, w);
376                                 WritePageComment (page, ref separator, w);
377
378                                 for (int i = 0; i < page_size; i += 16) {
379                                         w.Write("{0}\t{1},{2},{3},{4},{5},{6},{7},{8}," +
380                                                 "{9},{10},{11},{12},{13},{14},{15},{16}",
381                                                 separator,
382                                                 page.data[i +  0], page.data[i +  1],
383                                                 page.data[i +  2], page.data[i +  3],
384                                                 page.data[i +  4], page.data[i +  5],
385                                                 page.data[i +  6], page.data[i +  7],
386                                                 page.data[i +  8], page.data[i +  9],
387                                                 page.data[i + 10], page.data[i + 11],
388                                                 page.data[i + 12], page.data[i + 13],
389                                                 page.data[i + 14], page.data[i + 15]);
390
391                                         separator = TABLE_CONT;
392                                 }
393                         }
394
395                         // Separator intentionally ignored.
396                         if (has_ifndef)
397                                 w.Write ("{0}#endif", Environment.NewLine);
398                         w.WriteLine (TABLE_END);
399                 }
400
401                 bool MaybeWriteIfndef (Page page, ref string separator, TextWriter w)
402                 {
403                         if (no_astral_symbol == null || page.first_base != Char.MaxValue + 1)
404                                 return false;
405
406                         w.WriteLine ("{0}#ifndef {1}", Environment.NewLine, no_astral_symbol);
407                         // Previous separator, but indented on the new line following the directive.
408                         separator = "\t" + separator;
409                         return true;
410                 }
411
412                 void WritePageComment (Page page, ref string separator, TextWriter w)
413                 {
414                         int uses = page.index_entries.Count;
415                         IList index_entries = CollapseIndexEntries (page.index_entries);
416
417                         if (uses == 0 || index_entries.Count == 1) {
418                                 w.Write ("{0}\t/* Page {1}, {2} {3}use{4}",
419                                          separator, page.number, uses,
420                                          flat_bmp ? "indirect " : "",
421                                          uses != 1 ? "s" : "");
422
423                                 if (index_entries.Count == 1) {
424                                         IndexEntry ie = (IndexEntry) index_entries [0];
425
426                                         w.WriteLine (": {0:X4}-{1:X4} ({2}) */", ie.start, ie.end - 1, ie.key);
427                                 } else
428                                         w.WriteLine (" */");
429                         } else {
430                                 w.Write ("{0}\t/*{1}\t * Page {2}, {3} indirect use{4}",
431                                          separator, Environment.NewLine, page.number, uses, uses != 1 ? "s" : "");
432
433                                 separator = ":" + Environment.NewLine + "\t *\t";
434                                 string next_separator = "," + Environment.NewLine + "\t *\t";
435
436                                 foreach (IndexEntry ie in index_entries) {
437                                         w.Write ("{0}{1:X4}-{2:X4} ({3})", separator, ie.start, ie.end - 1, ie.key);
438                                         separator = next_separator;
439                                 }
440
441                                 // Separator intentionally ignored.
442                                 w.WriteLine (Environment.NewLine + "\t */");
443                         }
444
445                         separator = "";
446                 }
447
448                 void WriteIndexTable (Language lang, Index index, string name, TextWriter w)
449                 {
450                         bool ifndef_around = flat_bmp && no_astral_symbol != null;
451                         if (ifndef_around)
452                                 w.WriteLine ("#ifndef {0}", no_astral_symbol);
453
454                         if (lang == Language.C)
455                                 w.WriteLine ("static const guint{0} {1} [{2}] = ", index_bits, name, index.entries.Count);
456                         else {
457                                 string type = value_bits == 16 ? "ushort" : "uint";
458
459                                 w.WriteLine ("static readonly {0} [] {1} = new {0} [{2}] ", type, name, index.entries.Count);
460                         }
461
462                         string separator = TABLE_START;
463                         bool ifndef_inside = false;
464                         foreach (IndexEntry ie in index.entries) {
465                                 int index_value = ie.page.number /* * page_size */;
466
467                                 Debug.Assert (index_value < (1 << index_bits));
468
469                                 if (!ifndef_around)
470                                         ifndef_inside |= MaybeWriteIfndef (ie.page, ref separator, w);
471
472                                 w.WriteLine ("{0}\t/* {1:X4}-{2:X4}: page {3} */",
473                                              separator, ie.start, ie.end - 1, ie.page.number);
474                                 w.Write ("\t0x{0:X}", index_value);
475
476                                 separator = TABLE_CONT;
477                         }
478
479                         // Separator intentionally ignored.
480                         if (ifndef_inside)
481                                 w.Write ("{0}#endif", Environment.NewLine);
482                         w.WriteLine (TABLE_END);
483
484                         if (ifndef_around)
485                                 w.WriteLine ("#endif");
486                 }
487
488                 readonly int page_size;
489
490                 readonly int value_bits;
491
492                 readonly int index_bits;
493
494                 readonly bool flat_bmp;
495
496                 readonly string no_astral_symbol;
497
498                 IList pages = new ArrayList ();
499
500                 IList indices = new ArrayList ();
501
502                 static readonly string TABLE_START = "{" + Environment.NewLine;
503                 static readonly string TABLE_CONT = "," + Environment.NewLine;
504                 static readonly string TABLE_END = Environment.NewLine + "};";
505         }
506
507         class CategoryTableGenerator {
508                 const int MaxCodePoint = 0x10ffff;
509
510                 public class HostUCData : PagedTableEncoder.IData
511                 {
512                         public ushort this [int index]
513                         {
514                                 get {
515                                         if (index <= 0xffff)
516                                                 return (ushort) Char.GetUnicodeCategory ((char) index);
517                                         else {
518                                                 string s = Char.ConvertFromUtf32 (index);
519
520                                                 return (ushort) Char.GetUnicodeCategory (s, 0);
521                                         }
522                                 }
523                         }
524
525                         public int Count
526                         {
527                                 get {
528                                         return MaxCodePoint + 1;
529                                 }
530                         }
531                 }
532
533                 public static void Dump (PagedTableEncoder.IData source, TextWriter w)
534                 {
535                         w.WriteLine ("{0}", source.Count);
536
537                         for (int cp = 0; cp <= MaxCodePoint; cp++) {
538                                 byte cc = (byte) source [cp];
539
540                                 if (cc != 0)
541                                         w.WriteLine ("{0} {1}", cp, cc);
542                         }
543                 }
544
545                 public static PagedTableEncoder.IData ParseDump (TextReader r)
546                 {
547                         string line = r.ReadLine ();
548                         int count = int.Parse (line);
549                         byte [] data = new byte [count];
550
551                         while ((line = r.ReadLine ()) != null) {
552                                 int n = line.IndexOf (' ');
553                                 int cp = int.Parse (line.Substring (0, n));
554                                 int cc = int.Parse (line.Substring (n + 1));
555
556                                 if (cp < 0 || cp >= data.Length)
557                                         throw new Exception (String.Format ("Invalid code point {0:X4}", cp));
558
559                                 if (cc < 0 || cc > (int)UnicodeCategory.OtherNotAssigned)
560                                         throw new Exception (String.Format ("Invalid category code {0}", cc));
561
562                                 if (data [cp] != 0)
563                                         throw new Exception (String.Format ("Duplicate code point {0:X4}", cp));
564
565                                 data [cp] = (byte)cc;
566                         }
567
568                         return new PagedTableEncoder.ArrayData (data);
569                 }
570
571                 public static void Encode (string dump_file, string data_name, string h_file)
572                 {
573                         PagedTableEncoder.IData data;
574
575                         using (TextReader r = new StreamReader (dump_file))
576                                 data = ParseDump (r);
577
578                         PagedTableEncoder pte = new PagedTableEncoder (8, 8, 16, true, "DISABLE_ASTRAL");
579                         pte.Process (data, "astral_index");
580
581                         using (TextWriter w = new StreamWriter (h_file)) {
582                                 w.WriteLine ("/*");
583                                 w.WriteLine (" * The {0}* tables below are automatically generated", data_name);
584                                 w.WriteLine (" * by create-category-table(.cs), available in the mcs");
585                                 w.WriteLine (" * sources.  DO NOT EDIT!");
586                                 w.WriteLine (" */");
587                                 w.WriteLine ();
588
589                                 pte.WriteDefinitions (Language.C, data_name, w);
590                         }
591                 }
592
593                 public static void Main (string [] args)
594                 {
595                         for (int i = 0; i < args.Length; ) {
596                                 if (args [i] == "--dump") {
597                                         PagedTableEncoder.IData data = new HostUCData ();
598
599                                         using (TextWriter w = new StreamWriter (args [i + 1]))
600                                                 Dump (data, w);
601
602                                         i += 2;
603                                 } else if (args [i] == "--encode") {
604                                         Encode (args [i + 1], args [i + 2], args [i + 3]);
605
606                                         i += 4;
607                                 } else
608                                         throw new Exception ("Unrecognized argument: " + args [i]);
609                         }
610                 }
611         }
612 }