2005-05-06 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / corlib / System.Globalization / TextInfo.cs
1 //
2 // System.Globalization.TextInfo.cs
3 //
4 // Author:
5 //      Dick Porter (dick@ximian.com)
6 //      Duncan Mak (duncan@ximian.com)
7 //      Atsushi Enomoto (atsushi@ximian.com)
8 //
9 // (C) 2002 Ximian, Inc.
10 // (C) 2005 Novell, Inc.
11 //
12 // TODO:
13 //   Missing the various code page mappings.
14 //   Missing the OnDeserialization implementation.
15 //
16 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
17 //
18 // Permission is hereby granted, free of charge, to any person obtaining
19 // a copy of this software and associated documentation files (the
20 // "Software"), to deal in the Software without restriction, including
21 // without limitation the rights to use, copy, modify, merge, publish,
22 // distribute, sublicense, and/or sell copies of the Software, and to
23 // permit persons to whom the Software is furnished to do so, subject to
24 // the following conditions:
25 // 
26 // The above copyright notice and this permission notice shall be
27 // included in all copies or substantial portions of the Software.
28 // 
29 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36 //
37 using System;
38 using System.Globalization;
39 using System.Runtime.Serialization;
40 using System.Runtime.InteropServices;
41 using System.Text;
42
43 namespace System.Globalization {
44
45         [Serializable]
46         public class TextInfo: IDeserializationCallback
47         {
48                 [StructLayout (LayoutKind.Sequential)]
49                 struct Data {
50                         public int ansi;
51                         public int ebcdic;
52                         public int mac;
53                         public int oem;
54                         public byte list_sep;
55                 }
56
57                 int m_win32LangID;
58                 int m_nDataItem;
59                 bool m_useUserOverride;
60
61                 [NonSerialized]
62                 readonly CultureInfo ci;
63
64                 [NonSerialized]
65                 readonly CultureInfo parentCulture;
66
67                 [NonSerialized]
68                 readonly bool handleDotI;
69
70                 [NonSerialized]
71                 readonly Data data;
72
73                 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data)
74                 {
75                         this.m_win32LangID = lcid;
76                         this.ci = ci;
77                         if (data != null)
78                                 this.data = *(Data*) data;
79                         else {
80                                 this.data = new Data ();
81                                 this.data.list_sep = (byte) '.';
82                         }
83
84                         CultureInfo tmp = ci;
85                         while (tmp.Parent != null && tmp.Parent != tmp && tmp.Parent.LCID != 0x7F)
86                                 tmp = tmp.Parent;
87                         parentCulture = tmp;
88
89                         if (tmp != null) {
90                                 switch (tmp.LCID) {
91                                 case 44: // Azeri (az)
92                                 case 31: // Turkish (tr)
93                                         handleDotI = true;
94                                         break;
95                                 }
96                         }
97                 }
98
99                 public virtual int ANSICodePage
100                 {
101                         get {
102                                 return data.ansi;
103                         }
104                 }
105
106                 public virtual int EBCDICCodePage
107                 {
108                         get {
109                                 return data.ebcdic;
110                         }
111                 }
112
113                 public virtual string ListSeparator 
114                 {
115                         get {
116                                 
117                                 return ((char) data.list_sep).ToString ();
118                         }
119                 }
120
121                 public virtual int MacCodePage
122                 {
123                         get {
124                                 return data.mac;
125                         }
126                 }
127
128                 public virtual int OEMCodePage
129                 {
130                         get {
131                                 return data.oem;
132                         }
133                 }
134
135                 public override bool Equals (object obj)
136                 {
137                         if (obj == null)
138                                 return false;
139                         TextInfo other = obj as TextInfo;
140                         if (other == null)
141                                 return false;
142                         if (other.m_win32LangID != m_win32LangID)
143                                 return false;
144                         if (other.ci != ci)
145                                 return false;
146                         return true;
147                 }
148
149                 public override int GetHashCode()
150                 {
151                         return (m_win32LangID);
152                 }
153                 
154                 public override string ToString()
155                 {
156                         return "TextInfo - " + m_win32LangID;
157                 }
158
159                 public string ToTitleCase (string str)
160                 {
161                         if(str == null)
162                                 throw new ArgumentNullException("string is null");
163
164                         StringBuilder sb = null;
165                         int i = 0;
166                         int start = 0;
167                         while (i < str.Length) {
168                                 if (!Char.IsLetter (str [i++]))
169                                         continue;
170                                 i--;
171                                 char t = ToTitleCase (str [i]);
172                                 bool capitalize = true;
173                                 if (t == str [i]) {
174                                         capitalize = false;
175                                         bool allTitle = true;
176                                         // if the word is all titlecase,
177                                         // then don't capitalize it.
178                                         int saved = i;
179                                         while (++i < str.Length) {
180                                                 if (Char.IsWhiteSpace (str [i]))
181                                                         break;
182                                                 t = ToTitleCase (str [i]);
183                                                 if (t != str [i]) {
184                                                         allTitle = false;
185                                                         break;
186                                                 }
187                                         }
188                                         if (allTitle)
189                                                 continue;
190                                         i = saved;
191
192                                         // still check if all remaining
193                                         // characters are lowercase,
194                                         // where we don't have to modify
195                                         // the source word.
196                                         while (++i < str.Length) {
197                                                 if (Char.IsWhiteSpace (str [i]))
198                                                         break;
199                                                 if (ToLower (str [i]) != str [i]) {
200                                                         capitalize = true;
201                                                         i = saved;
202                                                         break;
203                                                 }
204                                         }
205                                 }
206
207                                 if (capitalize) {
208                                         if (sb == null)
209                                                 sb = new StringBuilder (str.Length);
210                                         sb.Append (str, start, i - start);
211                                         sb.Append (ToTitleCase (str [i]));
212                                         start = i + 1;
213                                         while (++i < str.Length) {
214                                                 if (Char.IsWhiteSpace (str [i]))
215                                                         break;
216                                                 sb.Append (ToLower (str [i]));
217                                         }
218                                         start = i;
219                                 }
220                         }
221                         if (sb != null)
222                                 sb.Append (str, start, str.Length - start);
223
224                         return sb != null ? sb.ToString () : str;
225                 }
226
227                 // Only Azeri and Turkish have their own special cases.
228                 // Other than them, all languages have common special case
229                 // (enumerable enough).
230                 public virtual char ToLower (char c)
231                 {
232                         // quick ASCII range check
233                         if (c < 0x40 || 0x60 < c && c < 128)
234                                 return c;
235                         else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
236                                 return (char) (c + 0x20);
237
238                         if (ci == null || ci.LCID == 0x7F)
239                                 return Char.ToLowerInvariant (c);
240
241                         switch (c) {
242                         case '\u0049': // Latin uppercase I
243                                 if (handleDotI)
244                                         return '\u0131'; // I becomes dotless i
245                                 break;
246                         case '\u0130': // I-dotted
247                                 return '\u0069'; // i
248
249                         case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
250                                 return '\u01c6';
251                         // \u01c7 -> \u01c9 (LJ) : invariant
252                         case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
253                                 return '\u01c9';
254                         // \u01ca -> \u01cc (NJ) : invariant
255                         case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
256                                 return '\u01cc';
257                         // WITH CARON : invariant
258                         // WITH DIAERESIS AND * : invariant
259
260                         case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
261                                 return '\u01f3';
262                         case '\u03d2':  // ? it is not in ICU
263                                 return '\u03c5';
264                         case '\u03d3':  // ? it is not in ICU
265                                 return '\u03cd';
266                         case '\u03d4':  // ? it is not in ICU
267                                 return '\u03cb';
268                         }
269                         return Char.ToLowerInvariant (c);
270                 }
271
272                 public virtual char ToUpper (char c)
273                 {
274                         // quick ASCII range check
275                         if (c < 0x60)
276                                 return c;
277                         else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
278                                 return (char) (c - 0x20);
279
280                         if (ci == null || ci.LCID == 0x7F)
281                                 return Char.ToUpperInvariant (c);
282
283                         switch (c) {
284                         case '\u0069': // Latin lowercase i
285                                 if (handleDotI)
286                                         return '\u0130'; // dotted capital I
287                                 break;
288                         case '\u0131': // dotless i
289                                 return '\u0049'; // I
290
291                         case '\u01c5': // see ToLower()
292                                 return '\u01c4';
293                         case '\u01c8': // see ToLower()
294                                 return '\u01c7';
295                         case '\u01cb': // see ToLower()
296                                 return '\u01ca';
297                         case '\u01f2': // see ToLower()
298                                 return '\u01f1';
299                         case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
300                                 return '\u03aa'; // it is not in ICU
301                         case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
302                                 return '\u03ab'; // it is not in ICU
303                         case '\u03d0': // GREEK BETA
304                                 return '\u0392';
305                         case '\u03d1': // GREEK THETA
306                                 return '\u0398';
307                         case '\u03d5': // GREEK PHI
308                                 return '\u03a6';
309                         case '\u03d6': // GREEK PI
310                                 return '\u03a0';
311                         case '\u03f0': // GREEK KAPPA
312                                 return '\u039a';
313                         case '\u03f1': // GREEK RHO
314                                 return '\u03a1';
315                         // am not sure why miscellaneous GREEK symbols are 
316                         // not handled here.
317                         }
318
319                         return Char.ToUpperInvariant (c);
320                 }
321
322                 private char ToTitleCase (char c)
323                 {
324                         // Handle some Latin characters.
325                         switch (c) {
326                         case '\u01c4':
327                         case '\u01c5':
328                         case '\u01c6':
329                                 return '\u01c5';
330                         case '\u01c7':
331                         case '\u01c8':
332                         case '\u01c9':
333                                 return '\u01c8';
334                         case '\u01ca':
335                         case '\u01cb':
336                         case '\u01cc':
337                                 return '\u01cb';
338                         case '\u01f1':
339                         case '\u01f2':
340                         case '\u01f3':
341                                 return '\u01f2';
342                         }
343                         if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
344                                 '\u24d0' <= c && c <= '\u24e9')
345                                 return c;
346                         return ToUpper (c);
347                 }
348
349                 public virtual string ToLower (string s)
350                 {
351                         // In ICU (3.2) there are a few cases that one single
352                         // character results in multiple characters in e.g.
353                         // tr-TR culture. So I tried brute force conversion
354                         // test with single character as a string input, but 
355                         // there was no such conversion. So I think it just
356                         // invokes ToLower(char).
357                         if (s == null)
358                                 throw new ArgumentNullException ("string is null");
359                         StringBuilder sb = null;
360                         int start = 0;
361
362                         for (int i = 0; i < s.Length; i++) {
363                                 if (s [i] != ToLower (s [i])) {
364                                         if (sb == null)
365                                                 sb = new StringBuilder (s.Length);
366                                         sb.Append (s, start, i - start);
367                                         sb.Append (ToLower (s [i]));
368                                         start = i + 1;
369                                 }
370                         }
371
372                         if (sb != null && start < s.Length)
373                                 sb.Append (s, start, s.Length - start);
374                         return sb == null ? s : sb.ToString ();
375                 }
376
377                 public virtual string ToUpper (string s)
378                 {
379                         // In ICU (3.2) there is a case that string
380                         // is handled beyond per-character conversion, but
381                         // it is only lt-LT culture where MS.NET does not
382                         // handle any special transliteration. So I keep
383                         // ToUpper() just as character conversion.
384                         if (s == null)
385                                 throw new ArgumentNullException ("string is null");
386                         StringBuilder sb = null;
387                         int start = 0;
388                         for (int i = 0; i < s.Length; i++) {
389                                 if (s [i] != ToUpper (s [i])) {
390                                         if (sb == null)
391                                                 sb = new StringBuilder (s.Length);
392                                         sb.Append (s, start, i - start);
393                                         sb.Append (ToUpper (s [i]));
394                                         start = i + 1;
395                                 }
396                         }
397                         if (sb != null && start < s.Length)
398                                 sb.Append (s, start, s.Length - start);
399                         return sb == null ? s : sb.ToString ();
400                 }
401
402                 /* IDeserialization interface */
403                 [MonoTODO]
404                 void IDeserializationCallback.OnDeserialization(object sender)
405                 {
406                 }
407         }
408 }