Merge pull request #819 from brendanzagaeski/patch-1
[mono.git] / mcs / class / corlib / System.Globalization / TextInfo.cs
1 //
2 // System.Globalization.TextInfo.cs
3 //
4 // Authors:
5 //      Dick Porter (dick@ximian.com)
6 //      Duncan Mak (duncan@ximian.com)
7 //      Atsushi Enomoto (atsushi@ximian.com)
8 //      Sebastien Pouliot  <sebastien@ximian.com>
9 //
10 // (C) 2002 Ximian, Inc.
11 // (C) 2005 Novell, Inc.
12 //
13 // TODO:
14 //   Missing the various code page mappings.
15 //   Missing the OnDeserialization implementation.
16 //
17 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
18 //
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
26 // 
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
29 // 
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 //
38
39 using System.Runtime.Serialization;
40 using System.Runtime.InteropServices;
41 using System.Text;
42
43 namespace System.Globalization {
44
45         [Serializable]
46         [ComVisible (true)]
47         [MonoTODO ("IDeserializationCallback isn't implemented.")]
48         public class TextInfo: IDeserializationCallback, ICloneable
49         {
50                 [StructLayout (LayoutKind.Sequential)]
51                 struct Data {
52                         public int ansi;
53                         public int ebcdic;
54                         public int mac;
55                         public int oem;
56                         public bool right_to_left;
57                         public byte list_sep;
58                 }
59
60                 string m_listSeparator;
61                 bool m_isReadOnly;
62                 string customCultureName;
63
64 #pragma warning disable 169
65                 [NonSerialized]
66                 int m_nDataItem;
67                 bool m_useUserOverride;
68 #pragma warning restore 169             
69
70                 int m_win32LangID;
71
72                 [NonSerialized]
73                 readonly CultureInfo ci;
74
75                 [NonSerialized]
76                 readonly bool handleDotI;
77
78                 [NonSerialized]
79                 readonly Data data;
80
81                 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data, bool read_only)
82                 {
83                         this.m_isReadOnly = read_only;
84                         this.m_win32LangID = lcid;
85                         this.ci = ci;
86                         if (data != null)
87                                 this.data = *(Data*) data;
88                         else {
89                                 this.data = new Data ();
90                                 this.data.list_sep = (byte) ',';
91                         }
92
93                         CultureInfo tmp = ci;
94                         while (tmp.Parent != null && tmp.Parent.LCID != 0x7F && tmp.Parent != tmp)
95                                 tmp = tmp.Parent;
96
97                         if (tmp != null) {
98                                 switch (tmp.LCID) {
99                                 case 44: // Azeri (az)
100                                 case 31: // Turkish (tr)
101                                         handleDotI = true;
102                                         break;
103                                 }
104                         }
105                 }
106
107                 private TextInfo (TextInfo textInfo)
108                 {
109                         m_win32LangID = textInfo.m_win32LangID;
110                         m_nDataItem = textInfo.m_nDataItem;
111                         m_useUserOverride = textInfo.m_useUserOverride;
112                         m_listSeparator = textInfo.ListSeparator;
113                         customCultureName = textInfo.CultureName;
114                         ci = textInfo.ci;
115                         handleDotI = textInfo.handleDotI;
116                         data = textInfo.data;
117                 }
118
119                 public virtual int ANSICodePage
120                 {
121                         get {
122                                 return data.ansi;
123                         }
124                 }
125
126                 public virtual int EBCDICCodePage
127                 {
128                         get {
129                                 return data.ebcdic;
130                         }
131                 }
132
133                 [ComVisible (false)]
134                 public int LCID {
135                         get { return m_win32LangID; }
136                 }
137
138                 public virtual string ListSeparator {
139                         get {
140                                 if (m_listSeparator == null)
141                                         m_listSeparator = ((char) data.list_sep).ToString ();
142                                 return m_listSeparator;
143                         }
144                         [ComVisible (false)]
145                         set { m_listSeparator = value; }
146                 }
147
148                 public virtual int MacCodePage
149                 {
150                         get {
151                                 return data.mac;
152                         }
153                 }
154
155                 public virtual int OEMCodePage
156                 {
157                         get {
158                                 return data.oem;
159                         }
160                 }
161
162                 [ComVisible (false)]
163                 public string CultureName {
164                         get {
165                                 if (customCultureName == null)
166                                         customCultureName = ci == null ? String.Empty : ci.Name;
167                                 return customCultureName;
168                         }
169                 }
170
171                 [ComVisible (false)]
172                 public bool IsReadOnly {
173                         get { return m_isReadOnly; }
174                 }
175
176                 [ComVisible (false)]
177                 public bool IsRightToLeft {
178                         get {
179                                 return data.right_to_left;
180                         }
181                 }
182
183                 public override bool Equals (object obj)
184                 {
185                         if (obj == null)
186                                 return false;
187                         TextInfo other = obj as TextInfo;
188                         if (other == null)
189                                 return false;
190                         if (other.m_win32LangID != m_win32LangID)
191                                 return false;
192                         if (other.ci != ci)
193                                 return false;
194                         return true;
195                 }
196
197                 public override int GetHashCode()
198                 {
199                         return (m_win32LangID);
200                 }
201                 
202                 public override string ToString()
203                 {
204                         return "TextInfo - " + m_win32LangID;
205                 }
206
207                 public string ToTitleCase (string str)
208                 {
209                         if(str == null)
210                                 throw new ArgumentNullException ("str");
211
212                         StringBuilder sb = null;
213                         int i = 0;
214                         int start = 0;
215                         while (i < str.Length) {
216                                 if (!Char.IsLetter (str [i++]))
217                                         continue;
218                                 i--;
219                                 char t = ToTitleCase (str [i]);
220                                 bool capitalize = true;
221                                 if (t == str [i]) {
222                                         capitalize = false;
223                                         bool allTitle = true;
224                                         // if the word is all titlecase,
225                                         // then don't capitalize it.
226                                         int saved = i;
227                                         while (++i < str.Length) {
228                                                 var ch = str [i];
229                                                 var category = char.GetUnicodeCategory (ch);
230                                                 if (IsSeparator (category))
231                                                         break;
232                                                 t = ToTitleCase (ch);
233                                                 if (t != ch) {
234                                                         allTitle = false;
235                                                         break;
236                                                 }
237                                         }
238                                         if (allTitle)
239                                                 continue;
240                                         i = saved;
241
242                                         // still check if all remaining
243                                         // characters are lowercase,
244                                         // where we don't have to modify
245                                         // the source word.
246                                         while (++i < str.Length) {
247                                                 var ch = str [i];
248                                                 var category = char.GetUnicodeCategory (ch);
249                                                 if (IsSeparator (category))
250                                                         break;
251                                                 if (ToLower (ch) != ch) {
252                                                         capitalize = true;
253                                                         i = saved;
254                                                         break;
255                                                 }
256                                         }
257                                 }
258
259                                 if (capitalize) {
260                                         if (sb == null)
261                                                 sb = new StringBuilder (str.Length);
262                                         sb.Append (str, start, i - start);
263                                         sb.Append (ToTitleCase (str [i]));
264                                         start = i + 1;
265                                         while (++i < str.Length) {
266                                                 var ch = str [i];
267                                                 var category = char.GetUnicodeCategory (ch);
268                                                 if (IsSeparator (category))
269                                                         break;
270                                                 sb.Append (ToLower (ch));
271                                         }
272                                         start = i;
273                                 }
274                         }
275                         if (sb != null)
276                                 sb.Append (str, start, str.Length - start);
277
278                         return sb != null ? sb.ToString () : str;
279                 }
280
281                 static bool IsSeparator (UnicodeCategory category)
282                 {
283                         switch (category) {
284                         case UnicodeCategory.SpaceSeparator:
285                         case UnicodeCategory.LineSeparator:
286                         case UnicodeCategory.ParagraphSeparator:
287                         case UnicodeCategory.Control:
288                         case UnicodeCategory.Format:
289                         case UnicodeCategory.ConnectorPunctuation:
290                         case UnicodeCategory.DashPunctuation:
291                         case UnicodeCategory.OpenPunctuation:
292                         case UnicodeCategory.ClosePunctuation:
293                         case UnicodeCategory.InitialQuotePunctuation:
294                         case UnicodeCategory.FinalQuotePunctuation:
295                         case UnicodeCategory.OtherPunctuation:
296                                 return true;
297                         }
298
299                         return false;
300                 }
301
302                 // Only Azeri and Turkish have their own special cases.
303                 // Other than them, all languages have common special case
304                 // (enumerable enough).
305                 public virtual char ToLower (char c)
306                 {
307                         // quick ASCII range check
308                         if (c < 0x40 || 0x60 < c && c < 128)
309                                 return c;
310                         else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
311                                 return (char) (c + 0x20);
312
313                         if (ci == null || ci.LCID == 0x7F)
314                                 return Char.ToLowerInvariant (c);
315
316                         switch (c) {
317                         case '\u0049': // Latin uppercase I
318                                 if (handleDotI)
319                                         return '\u0131'; // I becomes dotless i
320                                 break;
321                         case '\u0130': // I-dotted
322                                 return '\u0069'; // i
323
324                         case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
325                                 return '\u01c6';
326                         // \u01c7 -> \u01c9 (LJ) : invariant
327                         case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
328                                 return '\u01c9';
329                         // \u01ca -> \u01cc (NJ) : invariant
330                         case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
331                                 return '\u01cc';
332                         // WITH CARON : invariant
333                         // WITH DIAERESIS AND * : invariant
334
335                         case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
336                                 return '\u01f3';
337                         case '\u03d2':  // ? it is not in ICU
338                                 return '\u03c5';
339                         case '\u03d3':  // ? it is not in ICU
340                                 return '\u03cd';
341                         case '\u03d4':  // ? it is not in ICU
342                                 return '\u03cb';
343                         }
344                         return Char.ToLowerInvariant (c);
345                 }
346
347                 public virtual char ToUpper (char c)
348                 {
349                         // quick ASCII range check
350                         if (c < 0x60)
351                                 return c;
352                         else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
353                                 return (char) (c - 0x20);
354
355                         if (ci == null || ci.LCID == 0x7F)
356                                 return Char.ToUpperInvariant (c);
357
358                         switch (c) {
359                         case '\u0069': // Latin lowercase i
360                                 if (handleDotI)
361                                         return '\u0130'; // dotted capital I
362                                 break;
363                         case '\u0131': // dotless i
364                                 return '\u0049'; // I
365
366                         case '\u01c5': // see ToLower()
367                                 return '\u01c4';
368                         case '\u01c8': // see ToLower()
369                                 return '\u01c7';
370                         case '\u01cb': // see ToLower()
371                                 return '\u01ca';
372                         case '\u01f2': // see ToLower()
373                                 return '\u01f1';
374                         case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
375                                 return '\u03aa'; // it is not in ICU
376                         case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
377                                 return '\u03ab'; // it is not in ICU
378                         case '\u03d0': // GREEK BETA
379                                 return '\u0392';
380                         case '\u03d1': // GREEK THETA
381                                 return '\u0398';
382                         case '\u03d5': // GREEK PHI
383                                 return '\u03a6';
384                         case '\u03d6': // GREEK PI
385                                 return '\u03a0';
386                         case '\u03f0': // GREEK KAPPA
387                                 return '\u039a';
388                         case '\u03f1': // GREEK RHO
389                                 return '\u03a1';
390                         // am not sure why miscellaneous GREEK symbols are 
391                         // not handled here.
392                         }
393
394                         return Char.ToUpperInvariant (c);
395                 }
396
397                 private char ToTitleCase (char c)
398                 {
399                         // Handle some Latin characters.
400                         switch (c) {
401                         case '\u01c4':
402                         case '\u01c5':
403                         case '\u01c6':
404                                 return '\u01c5';
405                         case '\u01c7':
406                         case '\u01c8':
407                         case '\u01c9':
408                                 return '\u01c8';
409                         case '\u01ca':
410                         case '\u01cb':
411                         case '\u01cc':
412                                 return '\u01cb';
413                         case '\u01f1':
414                         case '\u01f2':
415                         case '\u01f3':
416                                 return '\u01f2';
417                         }
418                         if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
419                                 '\u24d0' <= c && c <= '\u24e9')
420                                 return c;
421                         return ToUpper (c);
422                 }
423
424                 public unsafe virtual string ToLower (string str)
425                 {
426                         // In ICU (3.2) there are a few cases that one single
427                         // character results in multiple characters in e.g.
428                         // tr-TR culture. So I tried brute force conversion
429                         // test with single character as a string input, but 
430                         // there was no such conversion. So I think it just
431                         // invokes ToLower(char).
432                         if (str == null)
433                                 throw new ArgumentNullException ("str");
434
435                         if (str.Length == 0)
436                                 return String.Empty;
437
438                         string tmp = String.InternalAllocateStr (str.Length);
439                         fixed (char* source = str, dest = tmp) {
440
441                                 char* destPtr = (char*)dest;
442                                 char* sourcePtr = (char*)source;
443
444                                 for (int n = 0; n < str.Length; n++) {
445                                         *destPtr = ToLower (*sourcePtr);
446                                         sourcePtr++;
447                                         destPtr++;
448                                 }
449                         }
450                         return tmp;
451                 }
452
453                 public unsafe virtual string ToUpper (string str)
454                 {
455                         // In ICU (3.2) there is a case that string
456                         // is handled beyond per-character conversion, but
457                         // it is only lt-LT culture where MS.NET does not
458                         // handle any special transliteration. So I keep
459                         // ToUpper() just as character conversion.
460                         if (str == null)
461                                 throw new ArgumentNullException ("str");
462
463                         if (str.Length == 0)
464                                 return String.Empty;
465
466                         string tmp = String.InternalAllocateStr (str.Length);
467                         fixed (char* source = str, dest = tmp) {
468
469                                 char* destPtr = (char*)dest;
470                                 char* sourcePtr = (char*)source;
471
472                                 for (int n = 0; n < str.Length; n++) {
473                                         *destPtr = ToUpper (*sourcePtr);
474                                         sourcePtr++;
475                                         destPtr++;
476                                 }
477                         }
478                         return tmp;
479                 }
480
481                 [ComVisible (false)]
482                 public static TextInfo ReadOnly (TextInfo textInfo)
483                 {
484                         if (textInfo == null)
485                                 throw new ArgumentNullException ("textInfo");
486
487                         TextInfo ti = new TextInfo (textInfo);
488                         ti.m_isReadOnly = true;
489                         return ti;
490                 }
491
492                 /* IDeserialization interface */
493                 [MonoTODO]
494                 void IDeserializationCallback.OnDeserialization(object sender)
495                 {
496                         // FIXME: we need to re-create "data" in order to get most properties working
497                 }
498
499                 /* IClonable */
500                 [ComVisible (false)]
501                 public virtual object Clone ()
502                 {
503                         return new TextInfo (this);
504                 }
505         }
506 }