2008-11-04 Atsushi Enomoto <atsushi@ximian.com>

author Atsushi Eno <atsushieno@gmail.com>

Tue, 4 Nov 2008 00:29:35 +0000 (00:29 -0000)

committer Atsushi Eno <atsushieno@gmail.com>

Tue, 4 Nov 2008 00:29:35 +0000 (00:29 -0000)
author Atsushi Eno <atsushieno@gmail.com>
Tue, 4 Nov 2008 00:29:35 +0000 (00:29 -0000)
committer Atsushi Eno <atsushieno@gmail.com>
Tue, 4 Nov 2008 00:29:35 +0000 (00:29 -0000)
diff --git a/eglib/ChangeLog b/eglib/ChangeLog

index 15587dcda0a23061188d19789d6f93643aa8e7e0..4f742d12aef16db6d16a4aa9ca5afe35b217fff2 100644 (file)
--- a/eglib/ChangeLog
+++ b/eglib/ChangeLog
@@ -1,3 +1,13 @@
+2008-11-04  Atsushi Enomoto  <atsushi@ximian.com>
+
+       * src/gutf8.c, src/gunicode.c, src/glib.h:
+         implemented g_unichar_type(), g_unichar_toupper(),
+         g_unichar_tolower(), g_unichar_totitle(), g_utf8_strup()
+         and g_utf8_strdown(). Fixed some surrogate pair bugs.
+       * TODO : removed implemented things.
+       * test/unicode.c, test/tests.h, test/utf8.c, test/Makefile.am:
+         added new tests.
+
  2008-11-04  Atsushi Enomoto  <atsushi@ximian.com>
  
         * src/unicode-data.h : new header for some new unicode manipulation
diff --git a/eglib/TODO b/eglib/TODO

index 76ac0da7d5346292f460f29ed22e6c99f61ef4ef..bbbb49bbba4c9c2921879a4bc9948fd945b3e9e5 100644 (file)
--- a/eglib/TODO
+++ b/eglib/TODO
@@ -14,8 +14,7 @@ Macros:
  
  * Unimplemented, not supported currently:
  
-               g_unichar_tolower       Used for deprecated unmanaged string collation
-               g_unichar_type          Used for deprecated unmanaged string collation
+       (none as yet.)
  
  * Dead Code
  
diff --git a/eglib/src/glib.h b/eglib/src/glib.h

index 2071113cecb318e1cfb539ced5c263885db9ab23..b4ae36d7d5d48419ca514e389bb6e21d3866a8ee 100644 (file)
--- a/eglib/src/glib.h
+++ b/eglib/src/glib.h
@@ -531,10 +531,41 @@ gpointer g_convert_error_quark(void);
  typedef guint32 gunichar;
  
  typedef enum {
+       G_UNICODE_CONTROL,
+       G_UNICODE_FORMAT,
+       G_UNICODE_UNASSIGNED,
+       G_UNICODE_PRIVATE_USE,
+       G_UNICODE_SURROGATE,
         G_UNICODE_LOWERCASE_LETTER,
+       G_UNICODE_MODIFIER_LETTER,
+       G_UNICODE_OTHER_LETTER,
+       G_UNICODE_TITLECASE_LETTER,
+       G_UNICODE_UPPERCASE_LETTER,
+       G_UNICODE_COMBINING_MARK,
+       G_UNICODE_ENCLOSING_MARK,
+       G_UNICODE_NON_SPACING_MARK,
+       G_UNICODE_DECIMAL_NUMBER,
+       G_UNICODE_LETTER_NUMBER,
+       G_UNICODE_OTHER_NUMBER,
+       G_UNICODE_CONNECT_PUNCTUATION,
+       G_UNICODE_DASH_PUNCTUATION,
+       G_UNICODE_CLOSE_PUNCTUATION,
+       G_UNICODE_FINAL_PUNCTUATION,
+       G_UNICODE_INITIAL_PUNCTUATION,
+       G_UNICODE_OTHER_PUNCTUATION,
+       G_UNICODE_OPEN_PUNCTUATION,
+       G_UNICODE_CURRENCY_SYMBOL,
+       G_UNICODE_MODIFIER_SYMBOL,
+       G_UNICODE_MATH_SYMBOL,
+       G_UNICODE_OTHER_SYMBOL,
+       G_UNICODE_LINE_SEPARATOR,
+       G_UNICODE_PARAGRAPH_SEPARATOR,
+       G_UNICODE_SPACE_SEPARATOR
  } GUnicodeType;
  
+gunichar       g_unichar_toupper (gunichar c);
  gunichar       g_unichar_tolower (gunichar c);
+gunichar       g_unichar_totitle (gunichar c);
  GUnicodeType   g_unichar_type    (gunichar c);
  gboolean       g_unichar_isxdigit (gunichar c);
  gint           g_unichar_xdigit_value (gunichar c);
@@ -570,6 +601,8 @@ typedef enum {
         G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
  } GConvertError;
  
+gchar* g_utf8_strup (const gchar *str, gssize len);
+gchar* g_utf8_strdown (const gchar *str, gssize len);
  gunichar2 *g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
  gchar     *g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
  gunichar2 *g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
diff --git a/eglib/src/gunicode.c b/eglib/src/gunicode.c

index c426cf0588ad8c94452e4cb720d8b81a38bdf88f..0f7196221f234f13587ba73db932304dfafe071e 100644 (file)
--- a/eglib/src/gunicode.c
+++ b/eglib/src/gunicode.c
@@ -35,6 +35,7 @@
   */
  #include <stdio.h>
  #include <glib.h>
+#include <unicode-data.h>
  #include <errno.h>
  #ifdef _MSC_VER
  /* FIXME */
@@ -82,15 +83,94 @@ static const gulong offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E208
  GUnicodeType 
  g_unichar_type (gunichar c)
  {
-       g_error ("%s", "g_unichar_type is not implemented");
+int i;
+
+       guint16 cp = (guint16) c;
+       for (i = 0; i < unicode_category_ranges_count; i++) {
+               if (cp < unicode_category_ranges [i].start)
+                       continue;
+               if (unicode_category_ranges [i].end <= cp)
+                       continue;
+               return unicode_category [i] [cp - unicode_category_ranges [i].start];
+       }
+
+       /*
+       // 3400-4DB5: OtherLetter
+       // 4E00-9FC3: OtherLetter
+       // AC00-D7A3: OtherLetter
+       // D800-DFFF: OtherSurrogate
+       // E000-F8FF: OtherPrivateUse
+       // 20000-2A6D6 OtherLetter
+       // F0000-FFFFD OtherPrivateUse
+       // 100000-10FFFD OtherPrivateUse
+       */
+       if (0x3400 <= cp && cp < 0x4DB5)
+               return G_UNICODE_OTHER_LETTER;
+       if (0x4E00 <= cp && cp < 0x9FC3)
+               return G_UNICODE_OTHER_LETTER;
+       if (0xAC00<= cp && cp < 0xD7A3)
+               return G_UNICODE_OTHER_LETTER;
+       if (0xD800 <= cp && cp < 0xDFFF)
+               return G_UNICODE_SURROGATE;
+       if (0xE000 <= cp && cp < 0xF8FF)
+               return G_UNICODE_PRIVATE_USE;
+       /* since the argument is UTF-16, we cannot check beyond FFFF */
+
+       /* It should match any of above */
         return 0;
  }
  
+gunichar
+g_unichar_case (gunichar c, gboolean upper)
+{
+       gint8 i, i2;
+       guint32 cp = (guint32) c, v;
+
+       for (i = 0; i < simple_case_map_ranges_count; i++) {
+               if (cp < simple_case_map_ranges [i].start)
+                       return c;
+               if (simple_case_map_ranges [i].end <= cp)
+                       continue;
+               if (c < 0x10000) {
+                       guint16 *tab = upper ? simple_upper_case_mapping_lowarea [i] : simple_lower_case_mapping_lowarea [i];
+                       v = tab [cp - simple_case_map_ranges [i].start];
+               } else {
+                       i2 = i - (upper ? simple_upper_case_mapping_lowarea_table_count : simple_lower_case_mapping_lowarea_table_count);
+                       guint32 *tab = upper ? simple_upper_case_mapping_higharea [i2] : simple_lower_case_mapping_higharea [i2];
+                       v = tab [cp - simple_case_map_ranges [i].start];
+               }
+               return v != 0 ? (gunichar) v : c;
+       }
+       return c;
+}
+
+gunichar
+g_unichar_toupper (gunichar c)
+{
+       return g_unichar_case (c, TRUE);
+}
+
  gunichar
  g_unichar_tolower (gunichar c)
  {
-       g_error ("%s", "g_unichar_type is not implemented");
-       return 0;
+       return g_unichar_case (c, FALSE);
+}
+
+gunichar
+g_unichar_totitle (gunichar c)
+{
+       guint8 i;
+       guint32 cp;
+
+       cp = (guint32) c;
+       for (i = 0; i < simple_titlecase_mapping_count; i++) {
+               if (simple_titlecase_mapping [i].codepoint == cp)
+                       return simple_titlecase_mapping [i].title;
+               if (simple_titlecase_mapping [i].codepoint > cp)
+                       /* it is ordered, hence no more match */
+                       break;
+       }
+       return g_unichar_toupper (c);
  }
  
  gboolean
diff --git a/eglib/src/gutf8.c b/eglib/src/gutf8.c

index 77de844693d045222c4f479eeb5b401db48de1ec..9d5786951b1f78b58db401d8b75244011ad5a179 100644 (file)
--- a/eglib/src/gutf8.c
+++ b/eglib/src/gutf8.c
@@ -21,6 +21,40 @@ g_convert_error_quark ()
         return error_quark;
  }
  
+gunichar*
+utf8_case_conv (const gchar *str, gssize len, gboolean upper)
+{
+       glong i, u16len, u32len;
+       gunichar2 *u16str;
+       gunichar *u32str;
+       gchar *u8str;
+       GError **err = NULL;
+
+       u16str = g_utf8_to_utf16 (str, len, NULL, &u16len, err);
+       u32str = g_utf16_to_ucs4 (u16str, u16len, NULL, &u32len, err);
+       for (i = 0; i < u32len; i++) {
+               u32str [i] = upper ? g_unichar_toupper (u32str [i]) : g_unichar_tolower (u32str [i]);
+       }
+       g_free (u16str);
+       u16str = g_ucs4_to_utf16 (u32str, u32len, NULL, &u16len, err);
+       u8str = g_utf16_to_utf8 (u16str, u16len, NULL, NULL, err);
+       g_free (u32str);
+       g_free (u16str);
+       return u8str;
+}
+
+gchar*
+g_utf8_strup (const gchar *str, gssize len)
+{
+       return utf8_case_conv (str, len, TRUE);
+}
+
+gchar*
+g_utf8_strdown (const gchar *str, gssize len)
+{
+       return utf8_case_conv (str, len, FALSE);
+}
+
  gunichar2*
  g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error)
  {
@@ -268,12 +302,14 @@ g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *item
         while (len < 0 ? str [in_pos] : in_pos < len) {
                 ch = str [in_pos];
                 if (surrogate) {
-                       surrogate = 0;
-                       if (ch >= 0xDC00 && ch <= 0xDFFF)
+                       if (ch >= 0xDC00 && ch <= 0xDFFF) {
                                 codepoint = 0x10000 + (ch - 0xDC00) + ((surrogate - 0xD800) << 10);
-                       else
+                               surrogate = 0;
+                       } else {
+                               surrogate = 0;
                                 /* invalid surrogate pair */
                                 continue;
+                       }
                 } else {
                         /* fast path optimization */
                         if (ch < 0x80) {
@@ -296,6 +332,8 @@ g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *item
                 }
                 in_pos++;
  
+               if (surrogate != 0)
+                       continue;
                 if (codepoint < 0x80)
                         ret [out_pos++] = (gchar) codepoint;
                 else if (codepoint < 0x0800) {
diff --git a/eglib/test/Makefile.am b/eglib/test/Makefile.am

index f57c033abff9d49388d96c47a913e66fb790241d..4f825b3cc611707cbbad83279739e51811ccadd8 100644 (file)
--- a/eglib/test/Makefile.am
+++ b/eglib/test/Makefile.am
@@ -21,6 +21,7 @@ SOURCES = \
         pattern.c       \
         dir.c           \
         markup.c        \
+       unicode.c       \
         utf8.c          \
         endian.c        \
         module.c        \
diff --git a/eglib/test/tests.h b/eglib/test/tests.h

index 6c038e93b01cd608b2589b3a963a98702ee7b50a..61ba80fe7a99f7571a04e242e7d9c3b89ef7c0c9 100644 (file)
--- a/eglib/test/tests.h
+++ b/eglib/test/tests.h
@@ -18,6 +18,7 @@ DEFINE_TEST_GROUP_INIT_H(file_tests_init);
  DEFINE_TEST_GROUP_INIT_H(pattern_tests_init);
  DEFINE_TEST_GROUP_INIT_H(dir_tests_init);
  DEFINE_TEST_GROUP_INIT_H(markup_tests_init);
+DEFINE_TEST_GROUP_INIT_H(unicode_tests_init);
  DEFINE_TEST_GROUP_INIT_H(utf8_tests_init);
  DEFINE_TEST_GROUP_INIT_H(endian_tests_init);
  DEFINE_TEST_GROUP_INIT_H(module_tests_init);
@@ -42,6 +43,7 @@ static Group test_groups [] = {
         {"file",      file_tests_init},
         {"pattern",   pattern_tests_init},
         {"dir",       dir_tests_init},
+       {"unicode",   unicode_tests_init},
         {"utf8",      utf8_tests_init},
         {"endian",    endian_tests_init},
         {"module",    module_tests_init},
diff --git a/eglib/test/unicode.c b/eglib/test/unicode.c

new file mode 100644 (file)

index 0000000..c1c3402
--- /dev/null
+++ b/eglib/test/unicode.c
@@ -0,0 +1,99 @@
+#include "test.h"
+
+/*
+ * g_unichar_type
+ */
+RESULT
+test_g_unichar_type ()
+{
+       if (g_unichar_type ('A') != G_UNICODE_UPPERCASE_LETTER)
+               return FAILED ("#1");
+       if (g_unichar_type ('a') != G_UNICODE_LOWERCASE_LETTER)
+               return FAILED ("#2");
+       if (g_unichar_type ('1') != G_UNICODE_DECIMAL_NUMBER)
+               return FAILED ("#3");
+       if (g_unichar_type (0xA3) != G_UNICODE_CURRENCY_SYMBOL)
+               return FAILED ("#4");
+       return NULL;
+}
+
+/*
+ * g_unichar_toupper
+ */
+RESULT
+test_g_unichar_toupper ()
+{
+       if (g_unichar_toupper (0) != 0)
+               return FAILED ("#0");
+       if (g_unichar_toupper ('a') != 'A')
+               return FAILED ("#1");
+       if (g_unichar_toupper ('1') != '1')
+               return FAILED ("#2");
+       if (g_unichar_toupper (0x1C4) != 0x1C4)
+               return FAILED ("#3");
+       if (g_unichar_toupper (0x1F2) != 0x1F1)
+               return FAILED ("#4");
+       if (g_unichar_toupper (0x1F3) != 0x1F1)
+               return FAILED ("#5");
+       if (g_unichar_toupper (0xFFFF) != 0xFFFF)
+               return FAILED ("#6");
+       if (g_unichar_toupper (0x10428) != 0x10400)
+               return FAILED ("#7");
+       return NULL;
+}
+
+/*
+ * g_unichar_tolower
+ */
+RESULT
+test_g_unichar_tolower ()
+{
+       if (g_unichar_tolower (0) != 0)
+               return FAILED ("#0");
+       if (g_unichar_tolower ('A') != 'a')
+               return FAILED ("#1");
+       if (g_unichar_tolower ('1') != '1')
+               return FAILED ("#2");
+       if (g_unichar_tolower (0x1C5) != 0x1C6)
+               return FAILED ("#3");
+       if (g_unichar_tolower (0x1F1) != 0x1F3)
+               return FAILED ("#4");
+       if (g_unichar_tolower (0x1F2) != 0x1F3)
+               return FAILED ("#5");
+       if (g_unichar_tolower (0xFFFF) != 0xFFFF)
+               return FAILED ("#6");
+       return NULL;
+}
+
+/*
+ * g_unichar_totitle
+ */
+RESULT
+test_g_unichar_totitle ()
+{
+       if (g_unichar_toupper (0) != 0)
+               return FAILED ("#0");
+       if (g_unichar_totitle ('a') != 'A')
+               return FAILED ("#1");
+       if (g_unichar_totitle ('1') != '1')
+               return FAILED ("#2");
+       if (g_unichar_totitle (0x1C4) != 0x1C5)
+               return FAILED ("#3");
+       if (g_unichar_totitle (0x1F2) != 0x1F2)
+               return FAILED ("#4");
+       if (g_unichar_totitle (0x1F3) != 0x1F2)
+               return FAILED ("#5");
+       if (g_unichar_toupper (0xFFFF) != 0xFFFF)
+               return FAILED ("#6");
+       return NULL;
+}
+
+static Test unicode_tests [] = {
+       {"g_unichar_type", test_g_unichar_type},
+       {"g_unichar_toupper", test_g_unichar_toupper},
+       {"g_unichar_tolower", test_g_unichar_tolower},
+       {"g_unichar_totitle", test_g_unichar_totitle},
+       {NULL, NULL}
+};
+
+DEFINE_TEST_GROUP_INIT(unicode_tests_init, unicode_tests)
diff --git a/eglib/test/utf8.c b/eglib/test/utf8.c

index a0e66f806a01d0a7be97eda4a962ad9c8c37acb1..e7f04059d5b6670e7b5b802e838386bd26ef6c69 100644 (file)
--- a/eglib/test/utf8.c
+++ b/eglib/test/utf8.c
@@ -82,8 +82,8 @@ compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_
  RESULT
  test_utf16_to_utf8 ()
  {
-       const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
-       gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
+       const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80";
+       gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0};
         RESULT result;
  
         gchar_to_gunichar2 (str1, src1);
@@ -97,6 +97,15 @@ test_utf16_to_utf8 ()
         if (result != OK)
                 return result;
         result = compare_utf16_to_utf8 (src2, str2, 2, 4);
+       if (result != OK)
+               return result;
+       result = compare_utf16_to_utf8 (src3, str3, 1, 3);
+       if (result != OK)
+               return result;
+       result = compare_utf16_to_utf8 (src4, str4, 1, 3);
+       if (result != OK)
+               return result;
+       result = compare_utf16_to_utf8 (src5, str5, 2, 4);
         if (result != OK)
                 return result;
  
@@ -194,6 +203,7 @@ test_utf8_seq ()
         if (out_read != 2) {
                 return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
         }
+       g_free (dst);
  
         return OK;
  }
@@ -201,8 +211,8 @@ test_utf8_seq ()
  RESULT
  test_utf8_to_utf16 ()
  {
-       const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
-       gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
+       const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
+       gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
         RESULT result;
  
         gchar_to_gunichar2 (str1, src1);
@@ -216,6 +226,12 @@ test_utf8_to_utf16 ()
         if (result != OK)
                 return result;
         result = compare_utf8_to_utf16 (str2, src2, 4, 2);
+       if (result != OK)
+               return result;
+       result = compare_utf8_to_utf16 (str3, src3, 3, 1);
+       if (result != OK)
+               return result;
+       result = compare_utf8_to_utf16 (str4, src4, 3, 1);
         if (result != OK)
                 return result;
  
@@ -310,6 +326,8 @@ test_ucs4_to_utf16 ()
         static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
         static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
         static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
+       static gunichar str6[2] = {0x10400, '\0'};
+       static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'};
         static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
         gunichar2* res;
         glong items_read, items_written, current_write_index;
@@ -337,8 +355,8 @@ test_ucs4_to_utf16 ()
         items_read = items_written = 0;
         res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
         check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
-       if (check_result) return check_result;
         g_free (res);
+       if (check_result) return check_result;
  
         items_read = items_written = 0;
         err = 0;
@@ -367,6 +385,13 @@ test_ucs4_to_utf16 ()
                 current_write_index += items_written;
         }
  
+       items_read = items_written = 0;
+       err = 0;
+       res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err);
+       check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE);
+       if (check_result) return check_result;
+       g_free (res);
+
         return OK;
  }
  
@@ -411,6 +436,8 @@ test_utf16_to_ucs4 ()
         static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
                                      0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
         static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
+       static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
+       static gunichar exp5[2] = {0x10400, 0};
         static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
         gunichar* res;
         glong items_read, items_written, current_read_index,current_write_index;
@@ -481,6 +508,13 @@ test_utf16_to_ucs4 ()
                 current_write_index += items_written;
         }
  
+       items_read = items_written = 0;
+       err = 0;
+       res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
+       check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
+       if (check_result) return check_result;
+       g_free (res);
+
         return OK;
  }
  RESULT
@@ -636,6 +670,101 @@ test_utf8_validate()
         return OK;
  }
  
+glong
+utf8_byteslen (const gchar *src)
+{
+       int i = 0;
+       do {
+               if (src [i] == '\0')
+                       return i;
+               i++;
+       } while (TRUE);
+}
+
+RESULT
+test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup)
+{
+       gchar *tmp;
+       glong len, len2;
+       RESULT r;
+
+       len = utf8_byteslen (src);
+       tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len);
+       len2 = utf8_byteslen (tmp);
+       r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len);
+       g_free (tmp);
+       return r;
+}
+
+RESULT
+test_utf8_strup_each (const gchar *src, const gchar *expected)
+{
+       return test_utf8_strcase_each (src, expected, TRUE);
+}
+
+RESULT
+test_utf8_strdown_each (const gchar *src, const gchar *expected)
+{
+       return test_utf8_strcase_each (src, expected, FALSE);
+}
+
+/*
+ * g_utf8_strup
+ */
+RESULT
+test_utf8_strup ()
+{
+       RESULT r;
+
+       if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK)
+               return r;
+       if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK)
+               return r;
+       // U+3B1 U+392 -> U+391 U+392
+       if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK)
+               return r;
+       // U+FF21 -> U+FF21
+       if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK)
+               return r;
+       // U+FF41 -> U+FF21
+       if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK)
+               return r;
+       // U+10428 -> U+10400
+       if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK)
+               return r;
+
+       return OK;
+}
+
+/*
+ * g_utf8_strdown
+ */
+RESULT
+test_utf8_strdown ()
+{
+       RESULT r;
+
+       if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK)
+               return r;
+       if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK)
+               return r;
+       // U+391 U+3B2 -> U+3B1 U+3B2
+       if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK)
+               return r;
+/*
+       // U+FF41 -> U+FF41
+       if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
+               return r;
+       // U+FF21 -> U+FF41
+       if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
+               return r;
+       // U+10400 -> U+10428
+       if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
+               return r;
+*/
+       return OK;
+}
+
  /*
   * test initialization
   */
@@ -652,6 +781,8 @@ static Test utf8_tests [] = {
         {"g_utf8_get_char", test_utf8_get_char },
         {"g_utf8_next_char", test_utf8_next_char },
         {"g_utf8_validate", test_utf8_validate },
+       {"g_utf8_strup", test_utf8_strup},
+       {"g_utf8_strdown", test_utf8_strdown},
         {NULL, NULL}
  };
author	Atsushi Eno <atsushieno@gmail.com>
	Tue, 4 Nov 2008 00:29:35 +0000 (00:29 -0000)
committer	Atsushi Eno <atsushieno@gmail.com>
	Tue, 4 Nov 2008 00:29:35 +0000 (00:29 -0000)
eglib/ChangeLog		patch \| blob \| history
eglib/TODO		patch \| blob \| history
eglib/src/glib.h		patch \| blob \| history
eglib/src/gunicode.c		patch \| blob \| history
eglib/src/gutf8.c		patch \| blob \| history
eglib/test/Makefile.am		patch \| blob \| history
eglib/test/tests.h		patch \| blob \| history
eglib/test/unicode.c	[new file with mode: 0644]	patch \| blob
eglib/test/utf8.c		patch \| blob \| history