2 // System.Globalization.TextInfo.cs
5 // Dick Porter (dick@ximian.com)
6 // Duncan Mak (duncan@ximian.com)
7 // Atsushi Enomoto (atsushi@ximian.com)
9 // (C) 2002 Ximian, Inc.
10 // (C) 2005 Novell, Inc.
13 // Missing the various code page mappings.
14 // Missing the OnDeserialization implementation.
16 // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
18 // Permission is hereby granted, free of charge, to any person obtaining
19 // a copy of this software and associated documentation files (the
20 // "Software"), to deal in the Software without restriction, including
21 // without limitation the rights to use, copy, modify, merge, publish,
22 // distribute, sublicense, and/or sell copies of the Software, and to
23 // permit persons to whom the Software is furnished to do so, subject to
24 // the following conditions:
26 // The above copyright notice and this permission notice shall be
27 // included in all copies or substantial portions of the Software.
29 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 using System.Globalization;
39 using System.Runtime.Serialization;
40 using System.Runtime.InteropServices;
43 namespace System.Globalization {
46 public class TextInfo: IDeserializationCallback
48 [StructLayout (LayoutKind.Sequential)]
59 bool m_useUserOverride;
62 readonly CultureInfo ci;
65 readonly CultureInfo parentCulture;
68 readonly bool handleDotI;
73 internal unsafe TextInfo (CultureInfo ci, int lcid, void* data)
75 this.m_win32LangID = lcid;
78 this.data = *(Data*) data;
80 this.data = new Data ();
81 this.data.list_sep = (byte) '.';
85 while (tmp.Parent != null && tmp.Parent != tmp && tmp.Parent.LCID != 0x7F)
91 case 44: // Azeri (az)
92 case 31: // Turkish (tr)
99 public virtual int ANSICodePage
106 public virtual int EBCDICCodePage
113 public virtual string ListSeparator
117 return ((char) data.list_sep).ToString ();
121 public virtual int MacCodePage
128 public virtual int OEMCodePage
135 public override bool Equals (object obj)
139 TextInfo other = obj as TextInfo;
142 if (other.m_win32LangID != m_win32LangID)
149 public override int GetHashCode()
151 return (m_win32LangID);
154 public override string ToString()
156 return "TextInfo - " + m_win32LangID;
159 public string ToTitleCase (string str)
162 throw new ArgumentNullException("string is null");
164 StringBuilder sb = null;
167 while (i < str.Length) {
168 if (!Char.IsLetter (str [i++]))
171 char t = ToTitleCase (str [i]);
172 bool capitalize = true;
175 bool allTitle = true;
176 // if the word is all titlecase,
177 // then don't capitalize it.
179 while (++i < str.Length) {
180 if (Char.IsWhiteSpace (str [i]))
182 t = ToTitleCase (str [i]);
192 // still check if all remaining
193 // characters are lowercase,
194 // where we don't have to modify
196 while (++i < str.Length) {
197 if (Char.IsWhiteSpace (str [i]))
199 if (ToLower (str [i]) != str [i]) {
209 sb = new StringBuilder (str.Length);
210 sb.Append (str, start, i - start);
211 sb.Append (ToTitleCase (str [i]));
213 while (++i < str.Length) {
214 if (Char.IsWhiteSpace (str [i]))
216 sb.Append (ToLower (str [i]));
222 sb.Append (str, start, str.Length - start);
224 return sb != null ? sb.ToString () : str;
227 // Only Azeri and Turkish have their own special cases.
228 // Other than them, all languages have common special case
229 // (enumerable enough).
230 public virtual char ToLower (char c)
232 // quick ASCII range check
233 if (c < 0x40 || 0x60 < c && c < 128)
235 else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
236 return (char) (c + 0x20);
238 if (ci == null || ci.LCID == 0x7F)
239 return Char.ToLowerInvariant (c);
242 case '\u0049': // Latin uppercase I
244 return '\u0131'; // I becomes dotless i
246 case '\u0130': // I-dotted
247 return '\u0069'; // i
249 case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
251 // \u01c7 -> \u01c9 (LJ) : invariant
252 case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
254 // \u01ca -> \u01cc (NJ) : invariant
255 case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
257 // WITH CARON : invariant
258 // WITH DIAERESIS AND * : invariant
260 case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
262 case '\u03d2': // ? it is not in ICU
264 case '\u03d3': // ? it is not in ICU
266 case '\u03d4': // ? it is not in ICU
269 return Char.ToLowerInvariant (c);
272 public virtual char ToUpper (char c)
274 // quick ASCII range check
277 else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
278 return (char) (c - 0x20);
280 if (ci == null || ci.LCID == 0x7F)
281 return Char.ToUpperInvariant (c);
284 case '\u0069': // Latin lowercase i
286 return '\u0130'; // dotted capital I
288 case '\u0131': // dotless i
289 return '\u0049'; // I
291 case '\u01c5': // see ToLower()
293 case '\u01c8': // see ToLower()
295 case '\u01cb': // see ToLower()
297 case '\u01f2': // see ToLower()
299 case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
300 return '\u03aa'; // it is not in ICU
301 case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
302 return '\u03ab'; // it is not in ICU
303 case '\u03d0': // GREEK BETA
305 case '\u03d1': // GREEK THETA
307 case '\u03d5': // GREEK PHI
309 case '\u03d6': // GREEK PI
311 case '\u03f0': // GREEK KAPPA
313 case '\u03f1': // GREEK RHO
315 // am not sure why miscellaneous GREEK symbols are
319 return Char.ToUpperInvariant (c);
322 private char ToTitleCase (char c)
324 // Handle some Latin characters.
343 if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
344 '\u24d0' <= c && c <= '\u24e9')
349 public virtual string ToLower (string s)
351 // In ICU (3.2) there are a few cases that one single
352 // character results in multiple characters in e.g.
353 // tr-TR culture. So I tried brute force conversion
354 // test with single character as a string input, but
355 // there was no such conversion. So I think it just
356 // invokes ToLower(char).
358 throw new ArgumentNullException ("string is null");
359 StringBuilder sb = null;
362 for (int i = 0; i < s.Length; i++) {
363 if (s [i] != ToLower (s [i])) {
365 sb = new StringBuilder (s.Length);
366 sb.Append (s, start, i - start);
367 sb.Append (ToLower (s [i]));
372 if (sb != null && start < s.Length)
373 sb.Append (s, start, s.Length - start);
374 return sb == null ? s : sb.ToString ();
377 public virtual string ToUpper (string s)
379 // In ICU (3.2) there is a case that string
380 // is handled beyond per-character conversion, but
381 // it is only lt-LT culture where MS.NET does not
382 // handle any special transliteration. So I keep
383 // ToUpper() just as character conversion.
385 throw new ArgumentNullException ("string is null");
386 StringBuilder sb = null;
388 for (int i = 0; i < s.Length; i++) {
389 if (s [i] != ToUpper (s [i])) {
391 sb = new StringBuilder (s.Length);
392 sb.Append (s, start, i - start);
393 sb.Append (ToUpper (s [i]));
397 if (sb != null && start < s.Length)
398 sb.Append (s, start, s.Length - start);
399 return sb == null ? s : sb.ToString ();
402 /* IDeserialization interface */
404 void IDeserializationCallback.OnDeserialization(object sender)