2 // System.Globalization.StringInfo.cs
5 // Dick Porter (dick@ximian.com)
7 // (C) 2002 Ximian, Inc.
8 // (C) 2004 Novell, Inc.
12 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Collections;
35 using System.Runtime.InteropServices;
37 namespace System.Globalization {
41 public class StringInfo {
49 public StringInfo (string value)
51 // Argument check in property
56 public override bool Equals (object value)
58 StringInfo other = value as StringInfo;
59 return other != null && s == other.s;
63 public override int GetHashCode ()
65 return s.GetHashCode ();
68 public int LengthInTextElements {
72 for (int idx = 0; idx < s.Length; length++)
73 idx += GetNextTextElementLength (s, idx);
79 public string String {
83 throw new ArgumentNullException ("value");
89 public string SubstringByTextElements (int startingTextElement)
91 if (startingTextElement < 0 || s.Length == 0)
92 throw new ArgumentOutOfRangeException ("startingTextElement");
94 for (int i = 0; i < startingTextElement; i++) {
96 throw new ArgumentOutOfRangeException ("startingTextElement");
97 idx += GetNextTextElementLength (s, idx);
99 return s.Substring (idx);
102 public string SubstringByTextElements (int startingTextElement, int lengthInTextElements)
104 if (startingTextElement < 0 || s.Length == 0)
105 throw new ArgumentOutOfRangeException ("startingTextElement");
106 if (lengthInTextElements < 0)
107 throw new ArgumentOutOfRangeException ("lengthInTextElements");
109 for (int i = 0; i < startingTextElement; i++) {
111 throw new ArgumentOutOfRangeException ("startingTextElement");
112 idx += GetNextTextElementLength (s, idx);
115 for (int i = 0; i < lengthInTextElements; i++) {
117 throw new ArgumentOutOfRangeException ("lengthInTextElements");
118 idx += GetNextTextElementLength (s, idx);
120 return s.Substring (start, idx - start);
123 public static string GetNextTextElement(string str)
125 if(str == null || str.Length == 0) {
126 throw new ArgumentNullException("string is null");
128 return(GetNextTextElement (str, 0));
131 public static string GetNextTextElement(string str, int index)
133 int len = GetNextTextElementLength (str, index);
134 return len != 1 ? str.Substring (index, len) : new string (str [index], 1);
137 static int GetNextTextElementLength(string str, int index)
140 throw new ArgumentNullException("string is null");
143 if(index >= str.Length)
146 throw new ArgumentOutOfRangeException ("Index is not valid");
148 /* Find the next base character, surrogate
149 * pair or combining character sequence
152 char ch = str[index];
153 UnicodeCategory cat = char.GetUnicodeCategory (ch);
155 if (cat == UnicodeCategory.Surrogate) {
156 /* Check that it's a high surrogate
157 * followed by a low surrogate
159 if (ch >= 0xD800 && ch <= 0xDBFF) {
160 if ((index + 1) < str.Length &&
161 str[index + 1] >= 0xDC00 &&
162 str[index + 1] <= 0xDFFF) {
163 /* A valid surrogate pair */
166 /* High surrogate on its own */
170 /* Low surrogate on its own */
174 /* Look for a base character, which
175 * may or may not be followed by a
176 * series of combining characters
179 if (cat == UnicodeCategory.NonSpacingMark ||
180 cat == UnicodeCategory.SpacingCombiningMark ||
181 cat == UnicodeCategory.EnclosingMark) {
182 /* Not a base character */
188 while (index + count < str.Length) {
189 cat = char.GetUnicodeCategory (str[index + count]);
190 if (cat != UnicodeCategory.NonSpacingMark &&
191 cat != UnicodeCategory.SpacingCombiningMark &&
192 cat != UnicodeCategory.EnclosingMark) {
193 /* Finished the sequence */
203 public static TextElementEnumerator GetTextElementEnumerator(string str)
205 if(str == null || str.Length == 0) {
206 throw new ArgumentNullException("string is null");
208 return(new TextElementEnumerator (str, 0));
211 public static TextElementEnumerator GetTextElementEnumerator(string str, int index)
214 throw new ArgumentNullException("string is null");
217 if(index < 0 || index >= str.Length) {
218 throw new ArgumentOutOfRangeException ("Index is not valid");
221 return(new TextElementEnumerator (str, index));
224 public static int[] ParseCombiningCharacters(string str)
227 throw new ArgumentNullException("string is null");
230 ArrayList indices = new ArrayList (str.Length);
231 TextElementEnumerator tee = GetTextElementEnumerator (str);
234 while(tee.MoveNext ()) {
235 indices.Add (tee.ElementIndex);
238 return((int[])indices.ToArray (typeof (int)));