2 // System.Globalization.TextInfo.cs
5 // Dick Porter (dick@ximian.com)
6 // Duncan Mak (duncan@ximian.com)
7 // Atsushi Enomoto (atsushi@ximian.com)
8 // Sebastien Pouliot <sebastien@ximian.com>
10 // (C) 2002 Ximian, Inc.
11 // (C) 2005 Novell, Inc.
14 // Missing the various code page mappings.
15 // Missing the OnDeserialization implementation.
17 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
19 // Permission is hereby granted, free of charge, to any person obtaining
20 // a copy of this software and associated documentation files (the
21 // "Software"), to deal in the Software without restriction, including
22 // without limitation the rights to use, copy, modify, merge, publish,
23 // distribute, sublicense, and/or sell copies of the Software, and to
24 // permit persons to whom the Software is furnished to do so, subject to
25 // the following conditions:
27 // The above copyright notice and this permission notice shall be
28 // included in all copies or substantial portions of the Software.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
34 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
35 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
36 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 using System.Runtime.Serialization;
40 using System.Runtime.InteropServices;
43 namespace System.Globalization {
47 [MonoTODO ("IDeserializationCallback isn't implemented.")]
48 public class TextInfo: IDeserializationCallback, ICloneable
50 [StructLayout (LayoutKind.Sequential)]
56 public bool right_to_left;
60 string m_listSeparator;
62 string customCultureName;
64 #pragma warning disable 169
67 bool m_useUserOverride;
68 #pragma warning restore 169
73 readonly CultureInfo ci;
76 readonly bool handleDotI;
81 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data, bool read_only)
83 this.m_isReadOnly = read_only;
84 this.m_win32LangID = lcid;
87 this.data = *(Data*) data;
89 this.data = new Data ();
90 this.data.list_sep = (byte) ',';
94 while (tmp.Parent != null && tmp.Parent.LCID != 0x7F && tmp.Parent != tmp)
99 case 44: // Azeri (az)
100 case 31: // Turkish (tr)
107 private TextInfo (TextInfo textInfo)
109 m_win32LangID = textInfo.m_win32LangID;
110 m_nDataItem = textInfo.m_nDataItem;
111 m_useUserOverride = textInfo.m_useUserOverride;
112 m_listSeparator = textInfo.ListSeparator;
113 customCultureName = textInfo.CultureName;
115 handleDotI = textInfo.handleDotI;
116 data = textInfo.data;
119 public virtual int ANSICodePage
126 public virtual int EBCDICCodePage
135 get { return m_win32LangID; }
138 public virtual string ListSeparator {
140 if (m_listSeparator == null)
141 m_listSeparator = ((char) data.list_sep).ToString ();
142 return m_listSeparator;
145 set { m_listSeparator = value; }
148 public virtual int MacCodePage
155 public virtual int OEMCodePage
163 public string CultureName {
165 if (customCultureName == null)
166 customCultureName = ci == null ? String.Empty : ci.Name;
167 return customCultureName;
172 public bool IsReadOnly {
173 get { return m_isReadOnly; }
177 public bool IsRightToLeft {
179 return data.right_to_left;
183 public override bool Equals (object obj)
187 TextInfo other = obj as TextInfo;
190 if (other.m_win32LangID != m_win32LangID)
197 public override int GetHashCode()
199 return (m_win32LangID);
202 public override string ToString()
204 return "TextInfo - " + m_win32LangID;
207 public string ToTitleCase (string str)
210 throw new ArgumentNullException ("str");
212 StringBuilder sb = null;
215 while (i < str.Length) {
216 if (!Char.IsLetter (str [i++]))
219 char t = ToTitleCase (str [i]);
220 bool capitalize = true;
223 bool allTitle = true;
224 // if the word is all titlecase,
225 // then don't capitalize it.
227 while (++i < str.Length) {
229 var category = char.GetUnicodeCategory (ch);
230 if (IsSeparator (category))
232 t = ToTitleCase (ch);
242 // still check if all remaining
243 // characters are lowercase,
244 // where we don't have to modify
246 while (++i < str.Length) {
248 var category = char.GetUnicodeCategory (ch);
249 if (IsSeparator (category))
251 if (ToLower (ch) != ch) {
261 sb = new StringBuilder (str.Length);
262 sb.Append (str, start, i - start);
263 sb.Append (ToTitleCase (str [i]));
265 while (++i < str.Length) {
267 var category = char.GetUnicodeCategory (ch);
268 if (IsSeparator (category))
270 sb.Append (ToLower (ch));
276 sb.Append (str, start, str.Length - start);
278 return sb != null ? sb.ToString () : str;
281 static bool IsSeparator (UnicodeCategory category)
284 case UnicodeCategory.SpaceSeparator:
285 case UnicodeCategory.LineSeparator:
286 case UnicodeCategory.ParagraphSeparator:
287 case UnicodeCategory.Control:
288 case UnicodeCategory.Format:
289 case UnicodeCategory.ConnectorPunctuation:
290 case UnicodeCategory.DashPunctuation:
291 case UnicodeCategory.OpenPunctuation:
292 case UnicodeCategory.ClosePunctuation:
293 case UnicodeCategory.InitialQuotePunctuation:
294 case UnicodeCategory.FinalQuotePunctuation:
295 case UnicodeCategory.OtherPunctuation:
302 // Only Azeri and Turkish have their own special cases.
303 // Other than them, all languages have common special case
304 // (enumerable enough).
305 public virtual char ToLower (char c)
307 // quick ASCII range check
308 if (c < 0x40 || 0x60 < c && c < 128)
310 else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
311 return (char) (c + 0x20);
313 if (ci == null || ci.LCID == 0x7F)
314 return Char.ToLowerInvariant (c);
317 case '\u0049': // Latin uppercase I
319 return '\u0131'; // I becomes dotless i
321 case '\u0130': // I-dotted
322 return '\u0069'; // i
324 case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
326 // \u01c7 -> \u01c9 (LJ) : invariant
327 case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
329 // \u01ca -> \u01cc (NJ) : invariant
330 case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
332 // WITH CARON : invariant
333 // WITH DIAERESIS AND * : invariant
335 case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
337 case '\u03d2': // ? it is not in ICU
339 case '\u03d3': // ? it is not in ICU
341 case '\u03d4': // ? it is not in ICU
344 return Char.ToLowerInvariant (c);
347 public virtual char ToUpper (char c)
349 // quick ASCII range check
352 else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
353 return (char) (c - 0x20);
355 if (ci == null || ci.LCID == 0x7F)
356 return Char.ToUpperInvariant (c);
359 case '\u0069': // Latin lowercase i
361 return '\u0130'; // dotted capital I
363 case '\u0131': // dotless i
364 return '\u0049'; // I
366 case '\u01c5': // see ToLower()
368 case '\u01c8': // see ToLower()
370 case '\u01cb': // see ToLower()
372 case '\u01f2': // see ToLower()
374 case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
375 return '\u03aa'; // it is not in ICU
376 case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
377 return '\u03ab'; // it is not in ICU
378 case '\u03d0': // GREEK BETA
380 case '\u03d1': // GREEK THETA
382 case '\u03d5': // GREEK PHI
384 case '\u03d6': // GREEK PI
386 case '\u03f0': // GREEK KAPPA
388 case '\u03f1': // GREEK RHO
390 // am not sure why miscellaneous GREEK symbols are
394 return Char.ToUpperInvariant (c);
397 private char ToTitleCase (char c)
399 // Handle some Latin characters.
418 if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
419 '\u24d0' <= c && c <= '\u24e9')
424 public unsafe virtual string ToLower (string str)
426 // In ICU (3.2) there are a few cases that one single
427 // character results in multiple characters in e.g.
428 // tr-TR culture. So I tried brute force conversion
429 // test with single character as a string input, but
430 // there was no such conversion. So I think it just
431 // invokes ToLower(char).
433 throw new ArgumentNullException ("str");
438 string tmp = String.InternalAllocateStr (str.Length);
439 fixed (char* source = str, dest = tmp) {
441 char* destPtr = (char*)dest;
442 char* sourcePtr = (char*)source;
444 for (int n = 0; n < str.Length; n++) {
445 *destPtr = ToLower (*sourcePtr);
453 public unsafe virtual string ToUpper (string str)
455 // In ICU (3.2) there is a case that string
456 // is handled beyond per-character conversion, but
457 // it is only lt-LT culture where MS.NET does not
458 // handle any special transliteration. So I keep
459 // ToUpper() just as character conversion.
461 throw new ArgumentNullException ("str");
466 string tmp = String.InternalAllocateStr (str.Length);
467 fixed (char* source = str, dest = tmp) {
469 char* destPtr = (char*)dest;
470 char* sourcePtr = (char*)source;
472 for (int n = 0; n < str.Length; n++) {
473 *destPtr = ToUpper (*sourcePtr);
482 public static TextInfo ReadOnly (TextInfo textInfo)
484 if (textInfo == null)
485 throw new ArgumentNullException ("textInfo");
487 TextInfo ti = new TextInfo (textInfo);
488 ti.m_isReadOnly = true;
492 /* IDeserialization interface */
494 void IDeserializationCallback.OnDeserialization(object sender)
496 // FIXME: we need to re-create "data" in order to get most properties working
501 public virtual object Clone ()
503 return new TextInfo (this);