2 // System.Globalization.TextInfo.cs
5 // Dick Porter (dick@ximian.com)
6 // Duncan Mak (duncan@ximian.com)
7 // Atsushi Enomoto (atsushi@ximian.com)
8 // Sebastien Pouliot <sebastien@ximian.com>
10 // (C) 2002 Ximian, Inc.
11 // (C) 2005 Novell, Inc.
14 // Missing the various code page mappings.
15 // Missing the OnDeserialization implementation.
17 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 using System.Runtime.CompilerServices;
40 using System.Runtime.Serialization;
41 using System.Runtime.InteropServices;
43 using System.Diagnostics.Contracts;
45 namespace System.Globalization {
49 [MonoTODO ("IDeserializationCallback isn't implemented.")]
50 public class TextInfo: IDeserializationCallback, ICloneable
55 GetDataTablePointersLite (out to_lower_data_low, out to_lower_data_high, out to_upper_data_low, out to_upper_data_high);
59 private readonly unsafe static ushort *to_lower_data_low;
60 private readonly unsafe static ushort *to_lower_data_high;
61 private readonly unsafe static ushort *to_upper_data_low;
62 private readonly unsafe static ushort *to_upper_data_high;
63 [MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.InternalCall)]
64 private unsafe static extern void GetDataTablePointersLite (out ushort *to_lower_data_low, out ushort *to_lower_data_high,
65 out ushort *to_upper_data_low, out ushort *to_upper_data_high);
67 static char ToLowerInvariant (char c)
70 if (c <= ((char)0x24cf))
71 return (char) to_lower_data_low [c];
72 if (c >= ((char)0xff21))
73 return (char) to_lower_data_high[c - 0xff21];
78 static char ToUpperInvariant (char c)
81 if (c <= ((char)0x24e9))
82 return (char) to_upper_data_low [c];
83 if (c >= ((char)0xff21))
84 return (char) to_upper_data_high [c - 0xff21];
89 [StructLayout (LayoutKind.Sequential)]
95 public bool right_to_left;
99 string m_listSeparator;
101 string customCultureName;
103 #pragma warning disable 169
106 bool m_useUserOverride;
107 #pragma warning restore 169
112 readonly CultureInfo ci;
115 readonly bool handleDotI;
120 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data, bool read_only)
122 this.m_isReadOnly = read_only;
123 this.m_win32LangID = lcid;
126 this.data = *(Data*) data;
128 this.data = new Data ();
129 this.data.list_sep = (byte) ',';
132 CultureInfo tmp = ci;
133 while (tmp.Parent != null && tmp.Parent.LCID != 0x7F && tmp.Parent != tmp)
138 case 44: // Azeri (az)
139 case 31: // Turkish (tr)
146 private TextInfo (TextInfo textInfo)
148 m_win32LangID = textInfo.m_win32LangID;
149 m_nDataItem = textInfo.m_nDataItem;
150 m_useUserOverride = textInfo.m_useUserOverride;
151 m_listSeparator = textInfo.ListSeparator;
152 customCultureName = textInfo.CultureName;
154 handleDotI = textInfo.handleDotI;
155 data = textInfo.data;
158 public virtual int ANSICodePage
165 public virtual int EBCDICCodePage
174 get { return m_win32LangID; }
177 public virtual string ListSeparator {
179 if (m_listSeparator == null)
180 m_listSeparator = ((char) data.list_sep).ToString ();
181 return m_listSeparator;
184 set { m_listSeparator = value; }
187 public virtual int MacCodePage
194 public virtual int OEMCodePage
202 public string CultureName {
204 if (customCultureName == null)
205 customCultureName = ci == null ? String.Empty : ci.Name;
206 return customCultureName;
211 public bool IsReadOnly {
212 get { return m_isReadOnly; }
216 public bool IsRightToLeft {
218 return data.right_to_left;
222 public override bool Equals (object obj)
226 TextInfo other = obj as TextInfo;
229 if (other.m_win32LangID != m_win32LangID)
236 public override int GetHashCode()
238 return (m_win32LangID);
241 public override string ToString()
243 return "TextInfo - " + m_win32LangID;
246 public string ToTitleCase (string str)
249 throw new ArgumentNullException ("str");
251 StringBuilder sb = null;
254 while (i < str.Length) {
255 if (!Char.IsLetter (str [i++]))
258 char t = ToTitleCase (str [i]);
259 bool capitalize = true;
262 bool allTitle = true;
263 // if the word is all titlecase,
264 // then don't capitalize it.
266 while (++i < str.Length) {
268 var category = char.GetUnicodeCategory (ch);
269 if (IsSeparator (category))
271 t = ToTitleCase (ch);
281 // still check if all remaining
282 // characters are lowercase,
283 // where we don't have to modify
285 while (++i < str.Length) {
287 var category = char.GetUnicodeCategory (ch);
288 if (IsSeparator (category))
290 if (ToLower (ch) != ch) {
300 sb = new StringBuilder (str.Length);
301 sb.Append (str, start, i - start);
302 sb.Append (ToTitleCase (str [i]));
304 while (++i < str.Length) {
306 var category = char.GetUnicodeCategory (ch);
307 if (IsSeparator (category))
309 sb.Append (ToLower (ch));
315 sb.Append (str, start, str.Length - start);
317 return sb != null ? sb.ToString () : str;
320 static bool IsSeparator (UnicodeCategory category)
323 case UnicodeCategory.SpaceSeparator:
324 case UnicodeCategory.LineSeparator:
325 case UnicodeCategory.ParagraphSeparator:
326 case UnicodeCategory.Control:
327 case UnicodeCategory.Format:
328 case UnicodeCategory.ConnectorPunctuation:
329 case UnicodeCategory.DashPunctuation:
330 case UnicodeCategory.OpenPunctuation:
331 case UnicodeCategory.ClosePunctuation:
332 case UnicodeCategory.InitialQuotePunctuation:
333 case UnicodeCategory.FinalQuotePunctuation:
334 case UnicodeCategory.OtherPunctuation:
341 // Only Azeri and Turkish have their own special cases.
342 // Other than them, all languages have common special case
343 // (enumerable enough).
344 public virtual char ToLower (char c)
346 // quick ASCII range check
347 if (c < 0x40 || 0x60 < c && c < 128)
349 else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
350 return (char) (c + 0x20);
352 if (ci == null || ci.LCID == 0x7F)
353 return ToLowerInvariant (c);
356 case '\u0049': // Latin uppercase I
358 return '\u0131'; // I becomes dotless i
360 case '\u0130': // I-dotted
361 return '\u0069'; // i
363 case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
365 // \u01c7 -> \u01c9 (LJ) : invariant
366 case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
368 // \u01ca -> \u01cc (NJ) : invariant
369 case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
371 // WITH CARON : invariant
372 // WITH DIAERESIS AND * : invariant
374 case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
376 case '\u03d2': // ? it is not in ICU
378 case '\u03d3': // ? it is not in ICU
380 case '\u03d4': // ? it is not in ICU
383 return ToLowerInvariant (c);
386 public virtual char ToUpper (char c)
388 // quick ASCII range check
391 else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
392 return (char) (c - 0x20);
394 if (ci == null || ci.LCID == 0x7F)
395 return ToUpperInvariant (c);
398 case '\u0069': // Latin lowercase i
400 return '\u0130'; // dotted capital I
402 case '\u0131': // dotless i
403 return '\u0049'; // I
405 case '\u01c5': // see ToLower()
407 case '\u01c8': // see ToLower()
409 case '\u01cb': // see ToLower()
411 case '\u01f2': // see ToLower()
413 case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
414 return '\u03aa'; // it is not in ICU
415 case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
416 return '\u03ab'; // it is not in ICU
417 case '\u03d0': // GREEK BETA
419 case '\u03d1': // GREEK THETA
421 case '\u03d5': // GREEK PHI
423 case '\u03d6': // GREEK PI
425 case '\u03f0': // GREEK KAPPA
427 case '\u03f1': // GREEK RHO
429 // am not sure why miscellaneous GREEK symbols are
433 return ToUpperInvariant (c);
436 private char ToTitleCase (char c)
438 // Handle some Latin characters.
457 if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
458 '\u24d0' <= c && c <= '\u24e9')
463 public unsafe virtual string ToLower (string str)
465 // In ICU (3.2) there are a few cases that one single
466 // character results in multiple characters in e.g.
467 // tr-TR culture. So I tried brute force conversion
468 // test with single character as a string input, but
469 // there was no such conversion. So I think it just
470 // invokes ToLower(char).
472 throw new ArgumentNullException ("str");
477 string tmp = String.FastAllocateString (str.Length);
478 fixed (char* source = str, dest = tmp) {
480 char* destPtr = (char*)dest;
481 char* sourcePtr = (char*)source;
483 for (int n = 0; n < str.Length; n++) {
484 *destPtr = ToLower (*sourcePtr);
492 public unsafe virtual string ToUpper (string str)
494 // In ICU (3.2) there is a case that string
495 // is handled beyond per-character conversion, but
496 // it is only lt-LT culture where MS.NET does not
497 // handle any special transliteration. So I keep
498 // ToUpper() just as character conversion.
500 throw new ArgumentNullException ("str");
505 string tmp = String.FastAllocateString (str.Length);
506 fixed (char* source = str, dest = tmp) {
508 char* destPtr = (char*)dest;
509 char* sourcePtr = (char*)source;
511 for (int n = 0; n < str.Length; n++) {
512 *destPtr = ToUpper (*sourcePtr);
521 public static TextInfo ReadOnly (TextInfo textInfo)
523 if (textInfo == null)
524 throw new ArgumentNullException ("textInfo");
526 TextInfo ti = new TextInfo (textInfo);
527 ti.m_isReadOnly = true;
531 /* IDeserialization interface */
533 void IDeserializationCallback.OnDeserialization(object sender)
535 // FIXME: we need to re-create "data" in order to get most properties working
540 public virtual object Clone ()
542 return new TextInfo (this);
545 internal int GetCaseInsensitiveHashCode (string str)
547 return StringComparer.CurrentCultureIgnoreCase.GetHashCode (str);
550 internal static unsafe int GetHashCodeOrdinalIgnoreCase (string s)
552 var length = s.Length;
553 fixed (char * c = s) {
555 char * end = cc + length - 1;
557 for (;cc < end; cc += 2) {
558 h = (h << 5) - h + Char.ToUpperInvariant (*cc);
559 h = (h << 5) - h + Char.ToUpperInvariant (cc [1]);
563 h = (h << 5) - h + Char.ToUpperInvariant (*cc);
568 internal static unsafe int CompareOrdinalIgnoreCase(String str1, String str2)
570 return CompareOrdinalIgnoreCaseEx (str1, 0, str2, 0, str1.Length, str2.Length);
573 internal static int CompareOrdinalIgnoreCaseEx (String strA, int indexA, String strB, int indexB, int lenA, int lenB)
575 return CompareOrdinalCaseInsensitiveUnchecked (strA, indexA, lenA, strB, indexB, lenB);
578 static unsafe int CompareOrdinalCaseInsensitiveUnchecked (String strA, int indexA, int lenA, String strB, int indexB, int lenB)
581 return strB == null ? 0 : -1;
586 int lengthA = Math.Min (lenA, strA.Length - indexA);
587 int lengthB = Math.Min (lenB, strB.Length - indexB);
589 if (lengthA == lengthB && Object.ReferenceEquals (strA, strB))
592 fixed (char* aptr = strA, bptr = strB) {
593 char* ap = aptr + indexA;
594 char* end = ap + Math.Min (lengthA, lengthB);
595 char* bp = bptr + indexB;
598 char c1 = Char.ToUpperInvariant (*ap);
599 char c2 = Char.ToUpperInvariant (*bp);
606 return lengthA - lengthB;
610 internal static unsafe int LastIndexOfStringOrdinalIgnoreCase(String source, String value, int startIndex, int count)
612 int valueLen = value.Length;
613 if (count < valueLen)
619 fixed (char* thisptr = source, valueptr = value) {
620 char* ap = thisptr + startIndex - valueLen + 1;
621 char* thisEnd = ap - count + valueLen - 1;
622 while (ap != thisEnd) {
623 for (int i = 0; i < valueLen; i++) {
624 if (Char.ToUpperInvariant (ap[i]) != Char.ToUpperInvariant (valueptr[i]))
627 return (int)(ap - thisptr);
635 internal static int IndexOfStringOrdinalIgnoreCase(String source, String value, int startIndex, int count)
637 Contract.Assert(source != null, "[TextInfo.IndexOfStringOrdinalIgnoreCase] Caller should've validated source != null");
638 Contract.Assert(value != null, "[TextInfo.IndexOfStringOrdinalIgnoreCase] Caller should've validated value != null");
639 Contract.Assert(startIndex + count <= source.Length, "[TextInfo.IndexOfStringOrdinalIgnoreCase] Caller should've validated startIndex + count <= source.Length");
641 // We return 0 if both inputs are empty strings
642 if (source.Length == 0 && value.Length == 0)
647 // the search space within [source] starts at offset [startIndex] inclusive and includes
648 // [count] characters (thus the last included character is at index [startIndex + count -1]
649 // [end] is the index of the next character after the search space
650 // (it points past the end of the search space)
651 int end = startIndex + count;
653 // maxStartIndex is the index beyond which we never *start* searching, inclusive; in other words;
654 // a search could include characters beyond maxStartIndex, but we'd never begin a search at an
655 // index strictly greater than maxStartIndex.
656 int maxStartIndex = end - value.Length;
658 for (; startIndex <= maxStartIndex; startIndex++)
660 // We should always have the same or more characters left to search than our actual pattern
661 Contract.Assert(end - startIndex >= value.Length);
662 // since this is an ordinal comparison, we can assume that the lengths must match
663 if (CompareOrdinalIgnoreCaseEx(source, startIndex, value, 0, value.Length, value.Length) == 0)