Merge pull request #1542 from ninjarobot/UriTemplateMatchException
[mono.git] / mcs / class / corlib / System.Globalization / TextInfo.cs
1 //
2 // System.Globalization.TextInfo.cs
3 //
4 // Authors:
5 //      Dick Porter (dick@ximian.com)
6 //      Duncan Mak (duncan@ximian.com)
7 //      Atsushi Enomoto (atsushi@ximian.com)
8 //      Sebastien Pouliot  <sebastien@ximian.com>
9 //
10 // (C) 2002 Ximian, Inc.
11 // (C) 2005 Novell, Inc.
12 //
13 // TODO:
14 //   Missing the various code page mappings.
15 //   Missing the OnDeserialization implementation.
16 //
17 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
18 //
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
26 // 
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
29 // 
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 //
38
39 using System.Runtime.CompilerServices;
40 using System.Runtime.Serialization;
41 using System.Runtime.InteropServices;
42 using System.Text;
43
44 namespace System.Globalization {
45
46         [Serializable]
47         [ComVisible (true)]
48         [MonoTODO ("IDeserializationCallback isn't implemented.")]
49         public class TextInfo: IDeserializationCallback, ICloneable
50         {
51                 static TextInfo ()
52                 {
53                         unsafe {
54                                 GetDataTablePointersLite (out to_lower_data_low, out to_lower_data_high, out to_upper_data_low, out to_upper_data_high);
55                         }
56                 }
57                 
58                 private readonly unsafe static ushort *to_lower_data_low;
59                 private readonly unsafe static ushort *to_lower_data_high;
60                 private readonly unsafe static ushort *to_upper_data_low;
61                 private readonly unsafe static ushort *to_upper_data_high;
62                 [MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.InternalCall)]
63                 private unsafe static extern void GetDataTablePointersLite (out ushort *to_lower_data_low, out ushort *to_lower_data_high,
64                         out ushort *to_upper_data_low, out ushort *to_upper_data_high);
65
66                 static char ToLowerInvariant (char c)
67                 {
68                         unsafe {
69                                 if (c <= ((char)0x24cf))
70                                         return (char) to_lower_data_low [c];
71                                 if (c >= ((char)0xff21))
72                                         return (char) to_lower_data_high[c - 0xff21];
73                         }
74                         return c;
75                 }
76
77                 static char ToUpperInvariant (char c)
78                 {
79                         unsafe {
80                                 if (c <= ((char)0x24e9))
81                                         return (char) to_upper_data_low [c];
82                                 if (c >= ((char)0xff21))
83                                         return (char) to_upper_data_high [c - 0xff21];
84                         }
85                         return c;
86                 }
87                 
88                 [StructLayout (LayoutKind.Sequential)]
89                 struct Data {
90                         public int ansi;
91                         public int ebcdic;
92                         public int mac;
93                         public int oem;
94                         public bool right_to_left;
95                         public byte list_sep;
96                 }
97
98                 string m_listSeparator;
99                 bool m_isReadOnly;
100                 string customCultureName;
101
102 #pragma warning disable 169
103                 [NonSerialized]
104                 int m_nDataItem;
105                 bool m_useUserOverride;
106 #pragma warning restore 169             
107
108                 int m_win32LangID;
109
110                 [NonSerialized]
111                 readonly CultureInfo ci;
112
113                 [NonSerialized]
114                 readonly bool handleDotI;
115
116                 [NonSerialized]
117                 readonly Data data;
118
119                 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data, bool read_only)
120                 {
121                         this.m_isReadOnly = read_only;
122                         this.m_win32LangID = lcid;
123                         this.ci = ci;
124                         if (data != null)
125                                 this.data = *(Data*) data;
126                         else {
127                                 this.data = new Data ();
128                                 this.data.list_sep = (byte) ',';
129                         }
130
131                         CultureInfo tmp = ci;
132                         while (tmp.Parent != null && tmp.Parent.LCID != 0x7F && tmp.Parent != tmp)
133                                 tmp = tmp.Parent;
134
135                         if (tmp != null) {
136                                 switch (tmp.LCID) {
137                                 case 44: // Azeri (az)
138                                 case 31: // Turkish (tr)
139                                         handleDotI = true;
140                                         break;
141                                 }
142                         }
143                 }
144
145                 private TextInfo (TextInfo textInfo)
146                 {
147                         m_win32LangID = textInfo.m_win32LangID;
148                         m_nDataItem = textInfo.m_nDataItem;
149                         m_useUserOverride = textInfo.m_useUserOverride;
150                         m_listSeparator = textInfo.ListSeparator;
151                         customCultureName = textInfo.CultureName;
152                         ci = textInfo.ci;
153                         handleDotI = textInfo.handleDotI;
154                         data = textInfo.data;
155                 }
156
157                 public virtual int ANSICodePage
158                 {
159                         get {
160                                 return data.ansi;
161                         }
162                 }
163
164                 public virtual int EBCDICCodePage
165                 {
166                         get {
167                                 return data.ebcdic;
168                         }
169                 }
170
171                 [ComVisible (false)]
172                 public int LCID {
173                         get { return m_win32LangID; }
174                 }
175
176                 public virtual string ListSeparator {
177                         get {
178                                 if (m_listSeparator == null)
179                                         m_listSeparator = ((char) data.list_sep).ToString ();
180                                 return m_listSeparator;
181                         }
182                         [ComVisible (false)]
183                         set { m_listSeparator = value; }
184                 }
185
186                 public virtual int MacCodePage
187                 {
188                         get {
189                                 return data.mac;
190                         }
191                 }
192
193                 public virtual int OEMCodePage
194                 {
195                         get {
196                                 return data.oem;
197                         }
198                 }
199
200                 [ComVisible (false)]
201                 public string CultureName {
202                         get {
203                                 if (customCultureName == null)
204                                         customCultureName = ci == null ? String.Empty : ci.Name;
205                                 return customCultureName;
206                         }
207                 }
208
209                 [ComVisible (false)]
210                 public bool IsReadOnly {
211                         get { return m_isReadOnly; }
212                 }
213
214                 [ComVisible (false)]
215                 public bool IsRightToLeft {
216                         get {
217                                 return data.right_to_left;
218                         }
219                 }
220
221                 public override bool Equals (object obj)
222                 {
223                         if (obj == null)
224                                 return false;
225                         TextInfo other = obj as TextInfo;
226                         if (other == null)
227                                 return false;
228                         if (other.m_win32LangID != m_win32LangID)
229                                 return false;
230                         if (other.ci != ci)
231                                 return false;
232                         return true;
233                 }
234
235                 public override int GetHashCode()
236                 {
237                         return (m_win32LangID);
238                 }
239                 
240                 public override string ToString()
241                 {
242                         return "TextInfo - " + m_win32LangID;
243                 }
244
245                 public string ToTitleCase (string str)
246                 {
247                         if(str == null)
248                                 throw new ArgumentNullException ("str");
249
250                         StringBuilder sb = null;
251                         int i = 0;
252                         int start = 0;
253                         while (i < str.Length) {
254                                 if (!Char.IsLetter (str [i++]))
255                                         continue;
256                                 i--;
257                                 char t = ToTitleCase (str [i]);
258                                 bool capitalize = true;
259                                 if (t == str [i]) {
260                                         capitalize = false;
261                                         bool allTitle = true;
262                                         // if the word is all titlecase,
263                                         // then don't capitalize it.
264                                         int saved = i;
265                                         while (++i < str.Length) {
266                                                 var ch = str [i];
267                                                 var category = char.GetUnicodeCategory (ch);
268                                                 if (IsSeparator (category))
269                                                         break;
270                                                 t = ToTitleCase (ch);
271                                                 if (t != ch) {
272                                                         allTitle = false;
273                                                         break;
274                                                 }
275                                         }
276                                         if (allTitle)
277                                                 continue;
278                                         i = saved;
279
280                                         // still check if all remaining
281                                         // characters are lowercase,
282                                         // where we don't have to modify
283                                         // the source word.
284                                         while (++i < str.Length) {
285                                                 var ch = str [i];
286                                                 var category = char.GetUnicodeCategory (ch);
287                                                 if (IsSeparator (category))
288                                                         break;
289                                                 if (ToLower (ch) != ch) {
290                                                         capitalize = true;
291                                                         i = saved;
292                                                         break;
293                                                 }
294                                         }
295                                 }
296
297                                 if (capitalize) {
298                                         if (sb == null)
299                                                 sb = new StringBuilder (str.Length);
300                                         sb.Append (str, start, i - start);
301                                         sb.Append (ToTitleCase (str [i]));
302                                         start = i + 1;
303                                         while (++i < str.Length) {
304                                                 var ch = str [i];
305                                                 var category = char.GetUnicodeCategory (ch);
306                                                 if (IsSeparator (category))
307                                                         break;
308                                                 sb.Append (ToLower (ch));
309                                         }
310                                         start = i;
311                                 }
312                         }
313                         if (sb != null)
314                                 sb.Append (str, start, str.Length - start);
315
316                         return sb != null ? sb.ToString () : str;
317                 }
318
319                 static bool IsSeparator (UnicodeCategory category)
320                 {
321                         switch (category) {
322                         case UnicodeCategory.SpaceSeparator:
323                         case UnicodeCategory.LineSeparator:
324                         case UnicodeCategory.ParagraphSeparator:
325                         case UnicodeCategory.Control:
326                         case UnicodeCategory.Format:
327                         case UnicodeCategory.ConnectorPunctuation:
328                         case UnicodeCategory.DashPunctuation:
329                         case UnicodeCategory.OpenPunctuation:
330                         case UnicodeCategory.ClosePunctuation:
331                         case UnicodeCategory.InitialQuotePunctuation:
332                         case UnicodeCategory.FinalQuotePunctuation:
333                         case UnicodeCategory.OtherPunctuation:
334                                 return true;
335                         }
336
337                         return false;
338                 }
339
340                 // Only Azeri and Turkish have their own special cases.
341                 // Other than them, all languages have common special case
342                 // (enumerable enough).
343                 public virtual char ToLower (char c)
344                 {
345                         // quick ASCII range check
346                         if (c < 0x40 || 0x60 < c && c < 128)
347                                 return c;
348                         else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
349                                 return (char) (c + 0x20);
350
351                         if (ci == null || ci.LCID == 0x7F)
352                                 return ToLowerInvariant (c);
353
354                         switch (c) {
355                         case '\u0049': // Latin uppercase I
356                                 if (handleDotI)
357                                         return '\u0131'; // I becomes dotless i
358                                 break;
359                         case '\u0130': // I-dotted
360                                 return '\u0069'; // i
361
362                         case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
363                                 return '\u01c6';
364                         // \u01c7 -> \u01c9 (LJ) : invariant
365                         case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
366                                 return '\u01c9';
367                         // \u01ca -> \u01cc (NJ) : invariant
368                         case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
369                                 return '\u01cc';
370                         // WITH CARON : invariant
371                         // WITH DIAERESIS AND * : invariant
372
373                         case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
374                                 return '\u01f3';
375                         case '\u03d2':  // ? it is not in ICU
376                                 return '\u03c5';
377                         case '\u03d3':  // ? it is not in ICU
378                                 return '\u03cd';
379                         case '\u03d4':  // ? it is not in ICU
380                                 return '\u03cb';
381                         }
382                         return ToLowerInvariant (c);
383                 }
384
385                 public virtual char ToUpper (char c)
386                 {
387                         // quick ASCII range check
388                         if (c < 0x60)
389                                 return c;
390                         else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
391                                 return (char) (c - 0x20);
392
393                         if (ci == null || ci.LCID == 0x7F)
394                                 return ToUpperInvariant (c);
395
396                         switch (c) {
397                         case '\u0069': // Latin lowercase i
398                                 if (handleDotI)
399                                         return '\u0130'; // dotted capital I
400                                 break;
401                         case '\u0131': // dotless i
402                                 return '\u0049'; // I
403
404                         case '\u01c5': // see ToLower()
405                                 return '\u01c4';
406                         case '\u01c8': // see ToLower()
407                                 return '\u01c7';
408                         case '\u01cb': // see ToLower()
409                                 return '\u01ca';
410                         case '\u01f2': // see ToLower()
411                                 return '\u01f1';
412                         case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
413                                 return '\u03aa'; // it is not in ICU
414                         case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
415                                 return '\u03ab'; // it is not in ICU
416                         case '\u03d0': // GREEK BETA
417                                 return '\u0392';
418                         case '\u03d1': // GREEK THETA
419                                 return '\u0398';
420                         case '\u03d5': // GREEK PHI
421                                 return '\u03a6';
422                         case '\u03d6': // GREEK PI
423                                 return '\u03a0';
424                         case '\u03f0': // GREEK KAPPA
425                                 return '\u039a';
426                         case '\u03f1': // GREEK RHO
427                                 return '\u03a1';
428                         // am not sure why miscellaneous GREEK symbols are 
429                         // not handled here.
430                         }
431
432                         return ToUpperInvariant (c);
433                 }
434
435                 private char ToTitleCase (char c)
436                 {
437                         // Handle some Latin characters.
438                         switch (c) {
439                         case '\u01c4':
440                         case '\u01c5':
441                         case '\u01c6':
442                                 return '\u01c5';
443                         case '\u01c7':
444                         case '\u01c8':
445                         case '\u01c9':
446                                 return '\u01c8';
447                         case '\u01ca':
448                         case '\u01cb':
449                         case '\u01cc':
450                                 return '\u01cb';
451                         case '\u01f1':
452                         case '\u01f2':
453                         case '\u01f3':
454                                 return '\u01f2';
455                         }
456                         if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
457                                 '\u24d0' <= c && c <= '\u24e9')
458                                 return c;
459                         return ToUpper (c);
460                 }
461
462                 public unsafe virtual string ToLower (string str)
463                 {
464                         // In ICU (3.2) there are a few cases that one single
465                         // character results in multiple characters in e.g.
466                         // tr-TR culture. So I tried brute force conversion
467                         // test with single character as a string input, but 
468                         // there was no such conversion. So I think it just
469                         // invokes ToLower(char).
470                         if (str == null)
471                                 throw new ArgumentNullException ("str");
472
473                         if (str.Length == 0)
474                                 return String.Empty;
475
476                         string tmp = String.InternalAllocateStr (str.Length);
477                         fixed (char* source = str, dest = tmp) {
478
479                                 char* destPtr = (char*)dest;
480                                 char* sourcePtr = (char*)source;
481
482                                 for (int n = 0; n < str.Length; n++) {
483                                         *destPtr = ToLower (*sourcePtr);
484                                         sourcePtr++;
485                                         destPtr++;
486                                 }
487                         }
488                         return tmp;
489                 }
490
491                 public unsafe virtual string ToUpper (string str)
492                 {
493                         // In ICU (3.2) there is a case that string
494                         // is handled beyond per-character conversion, but
495                         // it is only lt-LT culture where MS.NET does not
496                         // handle any special transliteration. So I keep
497                         // ToUpper() just as character conversion.
498                         if (str == null)
499                                 throw new ArgumentNullException ("str");
500
501                         if (str.Length == 0)
502                                 return String.Empty;
503
504                         string tmp = String.InternalAllocateStr (str.Length);
505                         fixed (char* source = str, dest = tmp) {
506
507                                 char* destPtr = (char*)dest;
508                                 char* sourcePtr = (char*)source;
509
510                                 for (int n = 0; n < str.Length; n++) {
511                                         *destPtr = ToUpper (*sourcePtr);
512                                         sourcePtr++;
513                                         destPtr++;
514                                 }
515                         }
516                         return tmp;
517                 }
518
519                 [ComVisible (false)]
520                 public static TextInfo ReadOnly (TextInfo textInfo)
521                 {
522                         if (textInfo == null)
523                                 throw new ArgumentNullException ("textInfo");
524
525                         TextInfo ti = new TextInfo (textInfo);
526                         ti.m_isReadOnly = true;
527                         return ti;
528                 }
529
530                 /* IDeserialization interface */
531                 [MonoTODO]
532                 void IDeserializationCallback.OnDeserialization(object sender)
533                 {
534                         // FIXME: we need to re-create "data" in order to get most properties working
535                 }
536
537                 /* IClonable */
538                 [ComVisible (false)]
539                 public virtual object Clone ()
540                 {
541                         return new TextInfo (this);
542                 }
543
544                 internal int GetCaseInsensitiveHashCode (string str)
545                 {
546                         return StringComparer.CurrentCultureIgnoreCase.GetHashCode (str);
547                 }
548
549                 internal static int GetHashCodeOrdinalIgnoreCase (string s)
550                 {
551                         return s.GetCaseInsensitiveHashCode ();
552                 }
553         }
554 }