2 // System.Globalization.TextInfo.cs
5 // Dick Porter (dick@ximian.com)
6 // Duncan Mak (duncan@ximian.com)
7 // Atsushi Enomoto (atsushi@ximian.com)
8 // Sebastien Pouliot <sebastien@ximian.com>
10 // (C) 2002 Ximian, Inc.
11 // (C) 2005 Novell, Inc.
14 // Missing the various code page mappings.
15 // Missing the OnDeserialization implementation.
17 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 using System.Runtime.CompilerServices;
40 using System.Runtime.Serialization;
41 using System.Runtime.InteropServices;
44 namespace System.Globalization {
48 [MonoTODO ("IDeserializationCallback isn't implemented.")]
49 public class TextInfo: IDeserializationCallback, ICloneable
54 GetDataTablePointersLite (out to_lower_data_low, out to_lower_data_high, out to_upper_data_low, out to_upper_data_high);
58 private readonly unsafe static ushort *to_lower_data_low;
59 private readonly unsafe static ushort *to_lower_data_high;
60 private readonly unsafe static ushort *to_upper_data_low;
61 private readonly unsafe static ushort *to_upper_data_high;
62 [MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.InternalCall)]
63 private unsafe static extern void GetDataTablePointersLite (out ushort *to_lower_data_low, out ushort *to_lower_data_high,
64 out ushort *to_upper_data_low, out ushort *to_upper_data_high);
66 static char ToLowerInvariant (char c)
69 if (c <= ((char)0x24cf))
70 return (char) to_lower_data_low [c];
71 if (c >= ((char)0xff21))
72 return (char) to_lower_data_high[c - 0xff21];
77 static char ToUpperInvariant (char c)
80 if (c <= ((char)0x24e9))
81 return (char) to_upper_data_low [c];
82 if (c >= ((char)0xff21))
83 return (char) to_upper_data_high [c - 0xff21];
88 [StructLayout (LayoutKind.Sequential)]
94 public bool right_to_left;
98 string m_listSeparator;
100 string customCultureName;
102 #pragma warning disable 169
105 bool m_useUserOverride;
106 #pragma warning restore 169
111 readonly CultureInfo ci;
114 readonly bool handleDotI;
119 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data, bool read_only)
121 this.m_isReadOnly = read_only;
122 this.m_win32LangID = lcid;
125 this.data = *(Data*) data;
127 this.data = new Data ();
128 this.data.list_sep = (byte) ',';
131 CultureInfo tmp = ci;
132 while (tmp.Parent != null && tmp.Parent.LCID != 0x7F && tmp.Parent != tmp)
137 case 44: // Azeri (az)
138 case 31: // Turkish (tr)
145 private TextInfo (TextInfo textInfo)
147 m_win32LangID = textInfo.m_win32LangID;
148 m_nDataItem = textInfo.m_nDataItem;
149 m_useUserOverride = textInfo.m_useUserOverride;
150 m_listSeparator = textInfo.ListSeparator;
151 customCultureName = textInfo.CultureName;
153 handleDotI = textInfo.handleDotI;
154 data = textInfo.data;
157 public virtual int ANSICodePage
164 public virtual int EBCDICCodePage
173 get { return m_win32LangID; }
176 public virtual string ListSeparator {
178 if (m_listSeparator == null)
179 m_listSeparator = ((char) data.list_sep).ToString ();
180 return m_listSeparator;
183 set { m_listSeparator = value; }
186 public virtual int MacCodePage
193 public virtual int OEMCodePage
201 public string CultureName {
203 if (customCultureName == null)
204 customCultureName = ci == null ? String.Empty : ci.Name;
205 return customCultureName;
210 public bool IsReadOnly {
211 get { return m_isReadOnly; }
215 public bool IsRightToLeft {
217 return data.right_to_left;
221 public override bool Equals (object obj)
225 TextInfo other = obj as TextInfo;
228 if (other.m_win32LangID != m_win32LangID)
235 public override int GetHashCode()
237 return (m_win32LangID);
240 public override string ToString()
242 return "TextInfo - " + m_win32LangID;
245 public string ToTitleCase (string str)
248 throw new ArgumentNullException ("str");
250 StringBuilder sb = null;
253 while (i < str.Length) {
254 if (!Char.IsLetter (str [i++]))
257 char t = ToTitleCase (str [i]);
258 bool capitalize = true;
261 bool allTitle = true;
262 // if the word is all titlecase,
263 // then don't capitalize it.
265 while (++i < str.Length) {
267 var category = char.GetUnicodeCategory (ch);
268 if (IsSeparator (category))
270 t = ToTitleCase (ch);
280 // still check if all remaining
281 // characters are lowercase,
282 // where we don't have to modify
284 while (++i < str.Length) {
286 var category = char.GetUnicodeCategory (ch);
287 if (IsSeparator (category))
289 if (ToLower (ch) != ch) {
299 sb = new StringBuilder (str.Length);
300 sb.Append (str, start, i - start);
301 sb.Append (ToTitleCase (str [i]));
303 while (++i < str.Length) {
305 var category = char.GetUnicodeCategory (ch);
306 if (IsSeparator (category))
308 sb.Append (ToLower (ch));
314 sb.Append (str, start, str.Length - start);
316 return sb != null ? sb.ToString () : str;
319 static bool IsSeparator (UnicodeCategory category)
322 case UnicodeCategory.SpaceSeparator:
323 case UnicodeCategory.LineSeparator:
324 case UnicodeCategory.ParagraphSeparator:
325 case UnicodeCategory.Control:
326 case UnicodeCategory.Format:
327 case UnicodeCategory.ConnectorPunctuation:
328 case UnicodeCategory.DashPunctuation:
329 case UnicodeCategory.OpenPunctuation:
330 case UnicodeCategory.ClosePunctuation:
331 case UnicodeCategory.InitialQuotePunctuation:
332 case UnicodeCategory.FinalQuotePunctuation:
333 case UnicodeCategory.OtherPunctuation:
340 // Only Azeri and Turkish have their own special cases.
341 // Other than them, all languages have common special case
342 // (enumerable enough).
343 public virtual char ToLower (char c)
345 // quick ASCII range check
346 if (c < 0x40 || 0x60 < c && c < 128)
348 else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
349 return (char) (c + 0x20);
351 if (ci == null || ci.LCID == 0x7F)
352 return ToLowerInvariant (c);
355 case '\u0049': // Latin uppercase I
357 return '\u0131'; // I becomes dotless i
359 case '\u0130': // I-dotted
360 return '\u0069'; // i
362 case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
364 // \u01c7 -> \u01c9 (LJ) : invariant
365 case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
367 // \u01ca -> \u01cc (NJ) : invariant
368 case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
370 // WITH CARON : invariant
371 // WITH DIAERESIS AND * : invariant
373 case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
375 case '\u03d2': // ? it is not in ICU
377 case '\u03d3': // ? it is not in ICU
379 case '\u03d4': // ? it is not in ICU
382 return ToLowerInvariant (c);
385 public virtual char ToUpper (char c)
387 // quick ASCII range check
390 else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
391 return (char) (c - 0x20);
393 if (ci == null || ci.LCID == 0x7F)
394 return ToUpperInvariant (c);
397 case '\u0069': // Latin lowercase i
399 return '\u0130'; // dotted capital I
401 case '\u0131': // dotless i
402 return '\u0049'; // I
404 case '\u01c5': // see ToLower()
406 case '\u01c8': // see ToLower()
408 case '\u01cb': // see ToLower()
410 case '\u01f2': // see ToLower()
412 case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
413 return '\u03aa'; // it is not in ICU
414 case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
415 return '\u03ab'; // it is not in ICU
416 case '\u03d0': // GREEK BETA
418 case '\u03d1': // GREEK THETA
420 case '\u03d5': // GREEK PHI
422 case '\u03d6': // GREEK PI
424 case '\u03f0': // GREEK KAPPA
426 case '\u03f1': // GREEK RHO
428 // am not sure why miscellaneous GREEK symbols are
432 return ToUpperInvariant (c);
435 private char ToTitleCase (char c)
437 // Handle some Latin characters.
456 if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
457 '\u24d0' <= c && c <= '\u24e9')
462 public unsafe virtual string ToLower (string str)
464 // In ICU (3.2) there are a few cases that one single
465 // character results in multiple characters in e.g.
466 // tr-TR culture. So I tried brute force conversion
467 // test with single character as a string input, but
468 // there was no such conversion. So I think it just
469 // invokes ToLower(char).
471 throw new ArgumentNullException ("str");
476 string tmp = String.InternalAllocateStr (str.Length);
477 fixed (char* source = str, dest = tmp) {
479 char* destPtr = (char*)dest;
480 char* sourcePtr = (char*)source;
482 for (int n = 0; n < str.Length; n++) {
483 *destPtr = ToLower (*sourcePtr);
491 public unsafe virtual string ToUpper (string str)
493 // In ICU (3.2) there is a case that string
494 // is handled beyond per-character conversion, but
495 // it is only lt-LT culture where MS.NET does not
496 // handle any special transliteration. So I keep
497 // ToUpper() just as character conversion.
499 throw new ArgumentNullException ("str");
504 string tmp = String.InternalAllocateStr (str.Length);
505 fixed (char* source = str, dest = tmp) {
507 char* destPtr = (char*)dest;
508 char* sourcePtr = (char*)source;
510 for (int n = 0; n < str.Length; n++) {
511 *destPtr = ToUpper (*sourcePtr);
520 public static TextInfo ReadOnly (TextInfo textInfo)
522 if (textInfo == null)
523 throw new ArgumentNullException ("textInfo");
525 TextInfo ti = new TextInfo (textInfo);
526 ti.m_isReadOnly = true;
530 /* IDeserialization interface */
532 void IDeserializationCallback.OnDeserialization(object sender)
534 // FIXME: we need to re-create "data" in order to get most properties working
539 public virtual object Clone ()
541 return new TextInfo (this);
544 internal int GetCaseInsensitiveHashCode (string str)
546 return StringComparer.CurrentCultureIgnoreCase.GetHashCode (str);
549 internal static int GetHashCodeOrdinalIgnoreCase (string s)
551 return s.GetCaseInsensitiveHashCode ();