[corlib] Consider all unicode separators in ToTitleCase. Fixes #16365
authorMarek Safar <marek.safar@gmail.com>
Fri, 29 Nov 2013 19:39:24 +0000 (20:39 +0100)
committerMarek Safar <marek.safar@gmail.com>
Fri, 29 Nov 2013 19:40:07 +0000 (20:40 +0100)
mcs/class/corlib/System.Globalization/TextInfo.cs
mcs/class/corlib/Test/System.Globalization/TextInfoTest.cs

index cd4b95be9875657bbb56e6d47caf9bf9901e97ed..56493a34362ce3da4e6c223d607b6eea34b877be 100644 (file)
@@ -225,10 +225,12 @@ namespace System.Globalization {
                                        // then don't capitalize it.
                                        int saved = i;
                                        while (++i < str.Length) {
-                                               if (Char.IsWhiteSpace (str [i]))
+                                               var ch = str [i];
+                                               var category = char.GetUnicodeCategory (ch);
+                                               if (IsSeparator (category))
                                                        break;
-                                               t = ToTitleCase (str [i]);
-                                               if (t != str [i]) {
+                                               t = ToTitleCase (ch);
+                                               if (t != ch) {
                                                        allTitle = false;
                                                        break;
                                                }
@@ -242,9 +244,11 @@ namespace System.Globalization {
                                        // where we don't have to modify
                                        // the source word.
                                        while (++i < str.Length) {
-                                               if (Char.IsWhiteSpace (str [i]))
+                                               var ch = str [i];
+                                               var category = char.GetUnicodeCategory (ch);
+                                               if (IsSeparator (category))
                                                        break;
-                                               if (ToLower (str [i]) != str [i]) {
+                                               if (ToLower (ch) != ch) {
                                                        capitalize = true;
                                                        i = saved;
                                                        break;
@@ -259,9 +263,11 @@ namespace System.Globalization {
                                        sb.Append (ToTitleCase (str [i]));
                                        start = i + 1;
                                        while (++i < str.Length) {
-                                               if (Char.IsWhiteSpace (str [i]))
+                                               var ch = str [i];
+                                               var category = char.GetUnicodeCategory (ch);
+                                               if (IsSeparator (category))
                                                        break;
-                                               sb.Append (ToLower (str [i]));
+                                               sb.Append (ToLower (ch));
                                        }
                                        start = i;
                                }
@@ -272,6 +278,27 @@ namespace System.Globalization {
                        return sb != null ? sb.ToString () : str;
                }
 
+               static bool IsSeparator (UnicodeCategory category)
+               {
+                       switch (category) {
+                       case UnicodeCategory.SpaceSeparator:
+                       case UnicodeCategory.LineSeparator:
+                       case UnicodeCategory.ParagraphSeparator:
+                       case UnicodeCategory.Control:
+                       case UnicodeCategory.Format:
+                       case UnicodeCategory.ConnectorPunctuation:
+                       case UnicodeCategory.DashPunctuation:
+                       case UnicodeCategory.OpenPunctuation:
+                       case UnicodeCategory.ClosePunctuation:
+                       case UnicodeCategory.InitialQuotePunctuation:
+                       case UnicodeCategory.FinalQuotePunctuation:
+                       case UnicodeCategory.OtherPunctuation:
+                               return true;
+                       }
+
+                       return false;
+               }
+
                // Only Azeri and Turkish have their own special cases.
                // Other than them, all languages have common special case
                // (enumerable enough).
index 5113cc5e981c7d4b88c367641a700f993237639d..9629dacf4b9ed9dfc4c02243941c19a7a9a8ef84 100644 (file)
@@ -24,10 +24,11 @@ public class TextInfoTest {
        {
                TextInfo ti = new CultureInfo ("en-US", false).TextInfo;
 
-               Assert.AreEqual (" The Dog", ti.ToTitleCase (" the dog"));
-               Assert.AreEqual (" The Dude", ti.ToTitleCase (" The Dude"));
-               Assert.AreEqual ("La Guerra Yla Paz", ti.ToTitleCase ("la Guerra yLa pAz"));
-               Assert.AreEqual ("\tTab\tAnd\tPeace", ti.ToTitleCase ("\ttab\taNd\tpeaCE"));
+               Assert.AreEqual (" The Dog", ti.ToTitleCase (" the dog"), "#1");
+               Assert.AreEqual (" The Dude", ti.ToTitleCase (" The Dude"), "#2");
+               Assert.AreEqual ("La Guerra Yla Paz", ti.ToTitleCase ("la Guerra yLa pAz"), "#3");
+               Assert.AreEqual ("\tTab\tAnd\tPeace", ti.ToTitleCase ("\ttab\taNd\tpeaCE"), "#4");
+               Assert.AreEqual ("This_Is\uFE58A\u0095String\u06D4With\uFE33Separators", ti.ToTitleCase ("this_is\uFE58a\u0095string\u06D4with\uFE33separators"), "#5");
        }
 
        [Test]