#include #include "test.h" /* * g_utf16_to_utf8 */ glong compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size) { int i; for (i = 0; i < size; i++) if (expected [i] != actual [i]) return i; return -1; } RESULT compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size) { glong ret; ret = compare_strings_utf8_pos (expected, actual, size); if (ret < 0) return OK; return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret); } void gchar_to_gunichar2 (gunichar2 ret[], const gchar *src) { int i; for (i = 0; src [i]; i++) ret [i] = src [i]; ret [i] = 0; } RESULT compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec) { GError *error; gchar* ret; RESULT result; glong in_read, out_read; result = NULL; error = NULL; ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error); if (error) { result = FAILED ("The error is %d %s\n", (error)->code, (error)->message); g_error_free (error); if (ret) g_free (ret); return result; } if (in_read != len_in) result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read); else if (out_read != len_out) result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read); else result = compare_strings_utf8_RESULT (expected, ret, len_out); g_free (ret); if (result) return result; return OK; } RESULT compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out) { RESULT result; result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1); if (result != OK) return result; return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in); } RESULT test_utf16_to_utf8 () { const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80"; gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0}; RESULT result; gchar_to_gunichar2 (str1, src1); /* empty string */ result = compare_utf16_to_utf8 (src0, str0, 0, 0); if (result != OK) return result; result = compare_utf16_to_utf8 (src1, str1, 5, 5); if (result != OK) return result; result = compare_utf16_to_utf8 (src2, str2, 2, 4); if (result != OK) return result; result = compare_utf16_to_utf8 (src3, str3, 1, 3); if (result != OK) return result; result = compare_utf16_to_utf8 (src4, str4, 1, 3); if (result != OK) return result; result = compare_utf16_to_utf8 (src5, str5, 2, 4); if (result != OK) return result; return OK; } /* * g_utf8_to_utf16 */ glong compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size) { int i; for (i = 0; i < size; i++) if (expected [i] != actual [i]) return i; return -1; } RESULT compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size) { glong ret; ret = compare_strings_utf16_pos (expected, actual, size); if (ret < 0) return OK; return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d ('%c' x '%c')\n", expected, actual, ret, expected [ret], actual [ret]); } #if !defined(EGLIB_TESTS) #define eg_utf8_to_utf16_with_nuls g_utf8_to_utf16 #endif RESULT compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec, gboolean include_nuls) { GError *error; gunichar2* ret; RESULT result; glong in_read, out_read; result = NULL; error = NULL; if (include_nuls) ret = eg_utf8_to_utf16_with_nuls (utf8, size_spec, &in_read, &out_read, &error); else ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error); if (error) { result = FAILED ("The error is %d %s\n", (error)->code, (error)->message); g_error_free (error); if (ret) g_free (ret); return result; } if (in_read != len_in) result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read); else if (out_read != len_out) result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read); else result = compare_strings_utf16_RESULT (expected, ret, len_out); g_free (ret); if (result) return result; return OK; } RESULT compare_utf8_to_utf16_general (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, gboolean include_nuls) { RESULT result; result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1, include_nuls); if (result != OK) return result; return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in, include_nuls); } RESULT compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out) { return compare_utf8_to_utf16_general (expected, utf8, len_in, len_out, FALSE); } RESULT compare_utf8_to_utf16_with_nuls (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out) { return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in, TRUE); } RESULT test_utf8_seq () { const gchar *src = "\xE5\xB9\xB4\x27"; glong in_read, out_read; //gunichar2 expected [6]; GError *error = NULL; gunichar2 *dst; //printf ("got: %s\n", src); dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error); if (error != NULL){ return error->message; } if (in_read != 4) { return FAILED ("in_read is expected to be 4 but was %d\n", in_read); } if (out_read != 2) { return FAILED ("out_read is expected to be 2 but was %d\n", out_read); } g_free (dst); return OK; } RESULT test_utf8_to_utf16 () { const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81"; gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}; RESULT result; gchar_to_gunichar2 (str1, src1); /* empty string */ result = compare_utf8_to_utf16 (str0, src0, 0, 0); if (result != OK) return result; result = compare_utf8_to_utf16 (str1, src1, 5, 5); if (result != OK) return result; result = compare_utf8_to_utf16 (str2, src2, 4, 2); if (result != OK) return result; result = compare_utf8_to_utf16 (str3, src3, 3, 1); if (result != OK) return result; result = compare_utf8_to_utf16 (str4, src4, 3, 1); if (result != OK) return result; return OK; } RESULT test_utf8_to_utf16_with_nuls () { const gchar *src0 = "", *src1 = "AB\0DE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81"; gunichar2 str0 [] = {0}, str1 [] = {'A', 'B', 0, 'D', 'E', 0}, str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}; RESULT result; #if !defined(EGLIB_TESTS) return OK; #endif /* implicit length is forbidden */ if (eg_utf8_to_utf16_with_nuls (src1, -1, NULL, NULL, NULL) != NULL) return FAILED ("explicit nulls must fail with -1 length\n"); /* empty string */ result = compare_utf8_to_utf16_with_nuls (str0, src0, 0, 0); if (result != OK) return result; result = compare_utf8_to_utf16_with_nuls (str1, src1, 5, 5); if (result != OK) return result; result = compare_utf8_to_utf16_with_nuls (str2, src2, 4, 2); if (result != OK) return result; result = compare_utf8_to_utf16_with_nuls (str3, src3, 3, 1); if (result != OK) return result; result = compare_utf8_to_utf16_with_nuls (str4, src4, 3, 1); if (result != OK) return result; return OK; } typedef struct { char *content; size_t length; } convert_result_t; RESULT test_convert () { static const char *charsets[] = { "UTF-8", "UTF-16LE", "UTF-16BE", "UTF-32LE", "UTF-32BE" }; gsize length, converted_length, n; char *content, *converted, *path; convert_result_t **expected; GError *err = NULL; const char *srcdir; gboolean loaded; guint i, j, k; char c; if (!(srcdir = getenv ("srcdir")) && !(srcdir = getenv ("PWD"))) return FAILED ("srcdir not defined!"); expected = g_malloc (sizeof (convert_result_t *) * G_N_ELEMENTS (charsets)); /* first load all our test samples... */ for (i = 0; i < G_N_ELEMENTS (charsets); i++) { path = g_strdup_printf ("%s%c%s.txt", srcdir, G_DIR_SEPARATOR, charsets[i]); loaded = g_file_get_contents (path, &content, &length, &err); g_free (path); if (!loaded) { for (j = 0; j < i; j++) { g_free (expected[j]->content); g_free (expected[j]); } g_free (expected); return FAILED ("Failed to load content for %s: %s", charsets[i], err->message); } expected[i] = g_malloc (sizeof (convert_result_t)); expected[i]->content = content; expected[i]->length = length; } /* test conversion from every charset to every other charset */ for (i = 0; i < G_N_ELEMENTS (charsets); i++) { for (j = 0; j < G_N_ELEMENTS (charsets); j++) { converted = g_convert (expected[i]->content, expected[i]->length, charsets[j], charsets[i], NULL, &converted_length, NULL); if (converted == NULL) { for (k = 0; k < G_N_ELEMENTS (charsets); k++) { g_free (expected[k]->content); g_free (expected[k]); } g_free (expected); return FAILED ("Failed to convert from %s to %s: NULL", charsets[i], charsets[j]); } if (converted_length != expected[j]->length) { length = expected[j]->length; for (k = 0; k < G_N_ELEMENTS (charsets); k++) { g_free (expected[k]->content); g_free (expected[k]); } g_free (converted); g_free (expected); return FAILED ("Failed to convert from %s to %s: expected %u bytes, got %u", charsets[i], charsets[j], length, converted_length); } for (n = 0; n < converted_length; n++) { if (converted[n] != expected[j]->content[n]) { c = expected[j]->content[n]; for (k = 0; k < G_N_ELEMENTS (charsets); k++) { g_free (expected[k]->content); g_free (expected[k]); } g_free (converted); g_free (expected); return FAILED ("Failed to convert from %s to %s: expected 0x%x at offset %u, got 0x%x", charsets[i], charsets[j], c, n, converted[n]); } } g_free (converted); } } for (k = 0; k < G_N_ELEMENTS (charsets); k++) { g_free (expected[k]->content); g_free (expected[k]); } g_free (expected); return OK; } RESULT test_xdigit () { static char test_chars[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'A', 'B', 'C', 'D', 'E', 'F', 'G'}; static gint32 test_values[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, 10, 11, 12, 13, 14, 15, -1}; int i =0; for (i = 0; i < sizeof(test_chars); i++) if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i]) return FAILED("Incorrect value %d at index %d", test_values[i], i); return OK; } static RESULT ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str, glong result_items_read, glong expected_items_read, glong result_items_written, glong expected_items_written, GError* result_error, gboolean expect_error) { glong i; if (result_items_read != expected_items_read) return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read, result_items_read); if (result_items_written != expected_items_written) return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written, result_items_written); if (result_error && !expect_error) return FAILED("There should not be an error code."); if (!result_error && expect_error) return FAILED("Unexpected error object."); if (expect_error && result_str) return FAILED("NULL should be returned when an error occurs."); if (!expect_error && !result_str) return FAILED("When no error occurs NULL should not be returned."); for (i=0; i 4) return FAILED ("Word1 has gone past its expected length"); if (*ptr != word1ExpectedValues[count]) return FAILED ("Word1 has an incorrect next_char at index %i", count); ptr = g_utf8_next_char (ptr); count++; } //Test word2 count = 0; ptr = word2; while (*ptr != 0) { if (count > 4) return FAILED ("Word2 has gone past its expected length"); if (*ptr != word2ExpectedValues[count]) return FAILED ("Word2 has an incorrect next_char at index %i", count); ptr = g_utf8_next_char (ptr); count++; } return OK; } RESULT test_utf8_validate() { gchar invalidWord1 [] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1 gchar invalidWord2 [] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1 gchar invalidWord3 [] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80 gchar validWord1 [] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid gchar validWord2 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid const gchar* end; gboolean retVal = g_utf8_validate (invalidWord1, -1, &end); if (retVal != FALSE) return FAILED ("Expected invalidWord1 to be invalid"); if (end != &invalidWord1 [2]) return FAILED ("Expected end parameter to be pointing to invalidWord1[2]"); end = NULL; retVal = g_utf8_validate (invalidWord2, -1, &end); if (retVal != FALSE) return FAILED ("Expected invalidWord2 to be invalid"); if (end != &invalidWord2 [0]) return FAILED ("Expected end parameter to be pointing to invalidWord2[0]"); end = NULL; retVal = g_utf8_validate (invalidWord3, -1, &end); if (retVal != FALSE) return FAILED ("Expected invalidWord3 to be invalid"); if (end != &invalidWord3 [0]) return FAILED ("Expected end parameter to be pointing to invalidWord3[1]"); end = NULL; retVal = g_utf8_validate (validWord1, -1, &end); if (retVal != TRUE) return FAILED ("Expected validWord1 to be valid"); if (end != &validWord1 [4]) return FAILED ("Expected end parameter to be pointing to validWord1[4]"); end = NULL; retVal = g_utf8_validate (validWord2, -1, &end); if (retVal != TRUE) return FAILED ("Expected validWord2 to be valid"); if (end != &validWord2 [11]) return FAILED ("Expected end parameter to be pointing to validWord2[11]"); return OK; } glong utf8_byteslen (const gchar *src) { int i = 0; do { if (src [i] == '\0') return i; i++; } while (TRUE); } RESULT test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup) { gchar *tmp; glong len, len2; RESULT r; len = utf8_byteslen (src); tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len); len2 = utf8_byteslen (tmp); r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len); g_free (tmp); return r; } RESULT test_utf8_strup_each (const gchar *src, const gchar *expected) { return test_utf8_strcase_each (src, expected, TRUE); } RESULT test_utf8_strdown_each (const gchar *src, const gchar *expected) { return test_utf8_strcase_each (src, expected, FALSE); } /* * g_utf8_strup */ RESULT test_utf8_strup () { RESULT r; if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK) return r; if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK) return r; // U+3B1 U+392 -> U+391 U+392 if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK) return r; // U+FF21 -> U+FF21 if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK) return r; // U+FF41 -> U+FF21 if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK) return r; // U+10428 -> U+10400 if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK) return r; return OK; } /* * g_utf8_strdown */ RESULT test_utf8_strdown () { RESULT r; if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK) return r; if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK) return r; // U+391 U+3B2 -> U+3B1 U+3B2 if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK) return r; /* // U+FF41 -> U+FF41 if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK) return r; // U+FF21 -> U+FF41 if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK) return r; // U+10400 -> U+10428 if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK) return r; */ return OK; } /* * test initialization */ static Test utf8_tests [] = { {"g_utf16_to_utf8", test_utf16_to_utf8}, {"g_utf8_to_utf16", test_utf8_to_utf16}, {"g_utf8_to_utf16_with_nuls", test_utf8_to_utf16_with_nuls}, {"g_utf8_seq", test_utf8_seq}, {"g_convert", test_convert }, {"g_unichar_xdigit_value", test_xdigit }, {"g_ucs4_to_utf16", test_ucs4_to_utf16 }, {"g_utf16_to_ucs4", test_utf16_to_ucs4 }, {"g_utf8_strlen", test_utf8_strlen }, {"g_utf8_get_char", test_utf8_get_char }, {"g_utf8_next_char", test_utf8_next_char }, {"g_utf8_validate", test_utf8_validate }, {"g_utf8_strup", test_utf8_strup}, {"g_utf8_strdown", test_utf8_strdown}, {NULL, NULL} }; DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)