mcs/class/corlib/System.Globalization/TextInfo.cs

   1 //
   2 // System.Globalization.TextInfo.cs
   3 //
   4 // Authors:
   5 //      Dick Porter (dick@ximian.com)
   6 //      Duncan Mak (duncan@ximian.com)
   7 //      Atsushi Enomoto (atsushi@ximian.com)
   8 //      Sebastien Pouliot  <sebastien@ximian.com>
   9 //
  10 // (C) 2002 Ximian, Inc.
  11 // (C) 2005 Novell, Inc.
  12 //
  13 // TODO:
  14 //   Missing the various code page mappings.
  15 //   Missing the OnDeserialization implementation.
  16 //
  17 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
  18 //
  19 // Permission is hereby granted, free of charge, to any person obtaining
  20 // a copy of this software and associated documentation files (the
  21 // "Software"), to deal in the Software without restriction, including
  22 // without limitation the rights to use, copy, modify, merge, publish,
  23 // distribute, sublicense, and/or sell copies of the Software, and to
  24 // permit persons to whom the Software is furnished to do so, subject to
  25 // the following conditions:
  26 //
  27 // The above copyright notice and this permission notice shall be
  28 // included in all copies or substantial portions of the Software.
  29 //
  30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  37 //
  38
  39 using System.Runtime.Serialization;
  40 using System.Runtime.InteropServices;
  41 using System.Text;
  42
  43 namespace System.Globalization {
  44
  45         [Serializable]
  46         [ComVisible (true)]
  47         [MonoTODO ("IDeserializationCallback isn't implemented.")]
  48         public class TextInfo: IDeserializationCallback, ICloneable
  49         {
  50                 [StructLayout (LayoutKind.Sequential)]
  51                 struct Data {
  52                         public int ansi;
  53                         public int ebcdic;
  54                         public int mac;
  55                         public int oem;
  56                         public bool right_to_left;
  57                         public byte list_sep;
  58                 }
  59
  60                 string m_listSeparator;
  61                 bool m_isReadOnly;
  62                 string customCultureName;
  63
  64 #pragma warning disable 169
  65                 [NonSerialized]
  66                 int m_nDataItem;
  67                 bool m_useUserOverride;
  68 #pragma warning restore 169
  69
  70                 int m_win32LangID;
  71
  72                 [NonSerialized]
  73                 readonly CultureInfo ci;
  74
  75                 [NonSerialized]
  76                 readonly bool handleDotI;
  77
  78                 [NonSerialized]
  79                 readonly Data data;
  80
  81                 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data, bool read_only)
  82                 {
  83                         this.m_isReadOnly = read_only;
  84                         this.m_win32LangID = lcid;
  85                         this.ci = ci;
  86                         if (data != null)
  87                                 this.data = *(Data*) data;
  88                         else {
  89                                 this.data = new Data ();
  90                                 this.data.list_sep = (byte) ',';
  91                         }
  92
  93                         CultureInfo tmp = ci;
  94                         while (tmp.Parent != null && tmp.Parent.LCID != 0x7F && tmp.Parent != tmp)
  95                                 tmp = tmp.Parent;
  96
  97                         if (tmp != null) {
  98                                 switch (tmp.LCID) {
  99                                 case 44: // Azeri (az)
 100                                 case 31: // Turkish (tr)
 101                                         handleDotI = true;
 102                                         break;
 103                                 }
 104                         }
 105                 }
 106
 107                 private TextInfo (TextInfo textInfo)
 108                 {
 109                         m_win32LangID = textInfo.m_win32LangID;
 110                         m_nDataItem = textInfo.m_nDataItem;
 111                         m_useUserOverride = textInfo.m_useUserOverride;
 112                         m_listSeparator = textInfo.ListSeparator;
 113                         customCultureName = textInfo.CultureName;
 114                         ci = textInfo.ci;
 115                         handleDotI = textInfo.handleDotI;
 116                         data = textInfo.data;
 117                 }
 118
 119                 public virtual int ANSICodePage
 120                 {
 121                         get {
 122                                 return data.ansi;
 123                         }
 124                 }
 125
 126                 public virtual int EBCDICCodePage
 127                 {
 128                         get {
 129                                 return data.ebcdic;
 130                         }
 131                 }
 132
 133                 [ComVisible (false)]
 134                 public int LCID {
 135                         get { return m_win32LangID; }
 136                 }
 137
 138                 public virtual string ListSeparator {
 139                         get {
 140                                 if (m_listSeparator == null)
 141                                         m_listSeparator = ((char) data.list_sep).ToString ();
 142                                 return m_listSeparator;
 143                         }
 144                         [ComVisible (false)]
 145                         set { m_listSeparator = value; }
 146                 }
 147
 148                 public virtual int MacCodePage
 149                 {
 150                         get {
 151                                 return data.mac;
 152                         }
 153                 }
 154
 155                 public virtual int OEMCodePage
 156                 {
 157                         get {
 158                                 return data.oem;
 159                         }
 160                 }
 161
 162                 [ComVisible (false)]
 163                 public string CultureName {
 164                         get {
 165                                 if (customCultureName == null)
 166                                         customCultureName = ci == null ? String.Empty : ci.Name;
 167                                 return customCultureName;
 168                         }
 169                 }
 170
 171                 [ComVisible (false)]
 172                 public bool IsReadOnly {
 173                         get { return m_isReadOnly; }
 174                 }
 175
 176                 [ComVisible (false)]
 177                 public bool IsRightToLeft {
 178                         get {
 179                                 return data.right_to_left;
 180                         }
 181                 }
 182
 183                 public override bool Equals (object obj)
 184                 {
 185                         if (obj == null)
 186                                 return false;
 187                         TextInfo other = obj as TextInfo;
 188                         if (other == null)
 189                                 return false;
 190                         if (other.m_win32LangID != m_win32LangID)
 191                                 return false;
 192                         if (other.ci != ci)
 193                                 return false;
 194                         return true;
 195                 }
 196
 197                 public override int GetHashCode()
 198                 {
 199                         return (m_win32LangID);
 200                 }
 201
 202                 public override string ToString()
 203                 {
 204                         return "TextInfo - " + m_win32LangID;
 205                 }
 206
 207                 public string ToTitleCase (string str)
 208                 {
 209                         if(str == null)
 210                                 throw new ArgumentNullException ("str");
 211
 212                         StringBuilder sb = null;
 213                         int i = 0;
 214                         int start = 0;
 215                         while (i < str.Length) {
 216                                 if (!Char.IsLetter (str [i++]))
 217                                         continue;
 218                                 i--;
 219                                 char t = ToTitleCase (str [i]);
 220                                 bool capitalize = true;
 221                                 if (t == str [i]) {
 222                                         capitalize = false;
 223                                         bool allTitle = true;
 224                                         // if the word is all titlecase,
 225                                         // then don't capitalize it.
 226                                         int saved = i;
 227                                         while (++i < str.Length) {
 228                                                 var ch = str [i];
 229                                                 var category = char.GetUnicodeCategory (ch);
 230                                                 if (IsSeparator (category))
 231                                                         break;
 232                                                 t = ToTitleCase (ch);
 233                                                 if (t != ch) {
 234                                                         allTitle = false;
 235                                                         break;
 236                                                 }
 237                                         }
 238                                         if (allTitle)
 239                                                 continue;
 240                                         i = saved;
 241
 242                                         // still check if all remaining
 243                                         // characters are lowercase,
 244                                         // where we don't have to modify
 245                                         // the source word.
 246                                         while (++i < str.Length) {
 247                                                 var ch = str [i];
 248                                                 var category = char.GetUnicodeCategory (ch);
 249                                                 if (IsSeparator (category))
 250                                                         break;
 251                                                 if (ToLower (ch) != ch) {
 252                                                         capitalize = true;
 253                                                         i = saved;
 254                                                         break;
 255                                                 }
 256                                         }
 257                                 }
 258
 259                                 if (capitalize) {
 260                                         if (sb == null)
 261                                                 sb = new StringBuilder (str.Length);
 262                                         sb.Append (str, start, i - start);
 263                                         sb.Append (ToTitleCase (str [i]));
 264                                         start = i + 1;
 265                                         while (++i < str.Length) {
 266                                                 var ch = str [i];
 267                                                 var category = char.GetUnicodeCategory (ch);
 268                                                 if (IsSeparator (category))
 269                                                         break;
 270                                                 sb.Append (ToLower (ch));
 271                                         }
 272                                         start = i;
 273                                 }
 274                         }
 275                         if (sb != null)
 276                                 sb.Append (str, start, str.Length - start);
 277
 278                         return sb != null ? sb.ToString () : str;
 279                 }
 280
 281                 static bool IsSeparator (UnicodeCategory category)
 282                 {
 283                         switch (category) {
 284                         case UnicodeCategory.SpaceSeparator:
 285                         case UnicodeCategory.LineSeparator:
 286                         case UnicodeCategory.ParagraphSeparator:
 287                         case UnicodeCategory.Control:
 288                         case UnicodeCategory.Format:
 289                         case UnicodeCategory.ConnectorPunctuation:
 290                         case UnicodeCategory.DashPunctuation:
 291                         case UnicodeCategory.OpenPunctuation:
 292                         case UnicodeCategory.ClosePunctuation:
 293                         case UnicodeCategory.InitialQuotePunctuation:
 294                         case UnicodeCategory.FinalQuotePunctuation:
 295                         case UnicodeCategory.OtherPunctuation:
 296                                 return true;
 297                         }
 298
 299                         return false;
 300                 }
 301
 302                 // Only Azeri and Turkish have their own special cases.
 303                 // Other than them, all languages have common special case
 304                 // (enumerable enough).
 305                 public virtual char ToLower (char c)
 306                 {
 307                         // quick ASCII range check
 308                         if (c < 0x40 || 0x60 < c && c < 128)
 309                                 return c;
 310                         else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
 311                                 return (char) (c + 0x20);
 312
 313                         if (ci == null || ci.LCID == 0x7F)
 314                                 return Char.ToLowerInvariant (c);
 315
 316                         switch (c) {
 317                         case '\u0049': // Latin uppercase I
 318                                 if (handleDotI)
 319                                         return '\u0131'; // I becomes dotless i
 320                                 break;
 321                         case '\u0130': // I-dotted
 322                                 return '\u0069'; // i
 323
 324                         case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
 325                                 return '\u01c6';
 326                         // \u01c7 -> \u01c9 (LJ) : invariant
 327                         case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
 328                                 return '\u01c9';
 329                         // \u01ca -> \u01cc (NJ) : invariant
 330                         case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
 331                                 return '\u01cc';
 332                         // WITH CARON : invariant
 333                         // WITH DIAERESIS AND * : invariant
 334
 335                         case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
 336                                 return '\u01f3';
 337                         case '\u03d2':  // ? it is not in ICU
 338                                 return '\u03c5';
 339                         case '\u03d3':  // ? it is not in ICU
 340                                 return '\u03cd';
 341                         case '\u03d4':  // ? it is not in ICU
 342                                 return '\u03cb';
 343                         }
 344                         return Char.ToLowerInvariant (c);
 345                 }
 346
 347                 public virtual char ToUpper (char c)
 348                 {
 349                         // quick ASCII range check
 350                         if (c < 0x60)
 351                                 return c;
 352                         else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
 353                                 return (char) (c - 0x20);
 354
 355                         if (ci == null || ci.LCID == 0x7F)
 356                                 return Char.ToUpperInvariant (c);
 357
 358                         switch (c) {
 359                         case '\u0069': // Latin lowercase i
 360                                 if (handleDotI)
 361                                         return '\u0130'; // dotted capital I
 362                                 break;
 363                         case '\u0131': // dotless i
 364                                 return '\u0049'; // I
 365
 366                         case '\u01c5': // see ToLower()
 367                                 return '\u01c4';
 368                         case '\u01c8': // see ToLower()
 369                                 return '\u01c7';
 370                         case '\u01cb': // see ToLower()
 371                                 return '\u01ca';
 372                         case '\u01f2': // see ToLower()
 373                                 return '\u01f1';
 374                         case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
 375                                 return '\u03aa'; // it is not in ICU
 376                         case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
 377                                 return '\u03ab'; // it is not in ICU
 378                         case '\u03d0': // GREEK BETA
 379                                 return '\u0392';
 380                         case '\u03d1': // GREEK THETA
 381                                 return '\u0398';
 382                         case '\u03d5': // GREEK PHI
 383                                 return '\u03a6';
 384                         case '\u03d6': // GREEK PI
 385                                 return '\u03a0';
 386                         case '\u03f0': // GREEK KAPPA
 387                                 return '\u039a';
 388                         case '\u03f1': // GREEK RHO
 389                                 return '\u03a1';
 390                         // am not sure why miscellaneous GREEK symbols are
 391                         // not handled here.
 392                         }
 393
 394                         return Char.ToUpperInvariant (c);
 395                 }
 396
 397                 private char ToTitleCase (char c)
 398                 {
 399                         // Handle some Latin characters.
 400                         switch (c) {
 401                         case '\u01c4':
 402                         case '\u01c5':
 403                         case '\u01c6':
 404                                 return '\u01c5';
 405                         case '\u01c7':
 406                         case '\u01c8':
 407                         case '\u01c9':
 408                                 return '\u01c8';
 409                         case '\u01ca':
 410                         case '\u01cb':
 411                         case '\u01cc':
 412                                 return '\u01cb';
 413                         case '\u01f1':
 414                         case '\u01f2':
 415                         case '\u01f3':
 416                                 return '\u01f2';
 417                         }
 418                         if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
 419                                 '\u24d0' <= c && c <= '\u24e9')
 420                                 return c;
 421                         return ToUpper (c);
 422                 }
 423
 424                 public unsafe virtual string ToLower (string str)
 425                 {
 426                         // In ICU (3.2) there are a few cases that one single
 427                         // character results in multiple characters in e.g.
 428                         // tr-TR culture. So I tried brute force conversion
 429                         // test with single character as a string input, but
 430                         // there was no such conversion. So I think it just
 431                         // invokes ToLower(char).
 432                         if (str == null)
 433                                 throw new ArgumentNullException ("str");
 434
 435                         if (str.Length == 0)
 436                                 return String.Empty;
 437
 438                         string tmp = String.InternalAllocateStr (str.Length);
 439                         fixed (char* source = str, dest = tmp) {
 440
 441                                 char* destPtr = (char*)dest;
 442                                 char* sourcePtr = (char*)source;
 443
 444                                 for (int n = 0; n < str.Length; n++) {
 445                                         *destPtr = ToLower (*sourcePtr);
 446                                         sourcePtr++;
 447                                         destPtr++;
 448                                 }
 449                         }
 450                         return tmp;
 451                 }
 452
 453                 public unsafe virtual string ToUpper (string str)
 454                 {
 455                         // In ICU (3.2) there is a case that string
 456                         // is handled beyond per-character conversion, but
 457                         // it is only lt-LT culture where MS.NET does not
 458                         // handle any special transliteration. So I keep
 459                         // ToUpper() just as character conversion.
 460                         if (str == null)
 461                                 throw new ArgumentNullException ("str");
 462
 463                         if (str.Length == 0)
 464                                 return String.Empty;
 465
 466                         string tmp = String.InternalAllocateStr (str.Length);
 467                         fixed (char* source = str, dest = tmp) {
 468
 469                                 char* destPtr = (char*)dest;
 470                                 char* sourcePtr = (char*)source;
 471
 472                                 for (int n = 0; n < str.Length; n++) {
 473                                         *destPtr = ToUpper (*sourcePtr);
 474                                         sourcePtr++;
 475                                         destPtr++;
 476                                 }
 477                         }
 478                         return tmp;
 479                 }
 480
 481                 [ComVisible (false)]
 482                 public static TextInfo ReadOnly (TextInfo textInfo)
 483                 {
 484                         if (textInfo == null)
 485                                 throw new ArgumentNullException ("textInfo");
 486
 487                         TextInfo ti = new TextInfo (textInfo);
 488                         ti.m_isReadOnly = true;
 489                         return ti;
 490                 }
 491
 492                 /* IDeserialization interface */
 493                 [MonoTODO]
 494                 void IDeserializationCallback.OnDeserialization(object sender)
 495                 {
 496                         // FIXME: we need to re-create "data" in order to get most properties working
 497                 }
 498
 499                 /* IClonable */
 500                 [ComVisible (false)]
 501                 public virtual object Clone ()
 502                 {
 503                         return new TextInfo (this);
 504                 }
 505         }
 506 }