#define g_string_append monoeg_g_string_append
#define g_string_append_c monoeg_g_string_append_c
#define g_string_append_len monoeg_g_string_append_len
+#define g_string_append_unichar monoeg_g_string_append_unichar
#define g_string_append_printf monoeg_g_string_append_printf
#define g_string_free monoeg_g_string_free
#define g_string_new monoeg_g_string_new
#define g_timer_start monoeg_g_timer_start
#define g_timer_stop monoeg_g_timer_stop
#define g_trailingBytesForUTF8 monoeg_g_trailingBytesForUTF8
+#define g_ucs4_to_utf8 monoeg_g_ucs4_to_utf8
#define g_ucs4_to_utf16 monoeg_g_ucs4_to_utf16
#define g_unichar_case monoeg_g_unichar_case
#define g_unichar_isxdigit monoeg_g_unichar_isxdigit
#define g_utf8_strup monoeg_g_utf8_strup
#define g_utf8_to_utf16 monoeg_g_utf8_to_utf16
#define g_utf8_validate monoeg_g_utf8_validate
+#define g_unichar_to_utf8 monoeg_g_unichar_to_utf8
#define g_win32_getlocale monoeg_g_win32_getlocale
#define g_assertion_message monoeg_assertion_message
#define g_malloc monoeg_malloc
#if !G_TYPES_DEFINED
/* VS 2010 and later have stdint.h */
#if defined(_MSC_VER) && _MSC_VER < 1600
-typedef __int8 gint8;
+typedef __int8 gint8;
typedef unsigned __int8 guint8;
-typedef __int16 gint16;
+typedef __int16 gint16;
typedef unsigned __int16 guint16;
-typedef __int32 gint32;
+typedef __int32 gint32;
typedef unsigned __int32 guint32;
-typedef __int64 gint64;
+typedef __int64 gint64;
typedef unsigned __int64 guint64;
-typedef float gfloat;
-typedef double gdouble;
-typedef unsigned __int16 gunichar2;
-typedef int gboolean;
+typedef float gfloat;
+typedef double gdouble;
+typedef int gboolean;
#else
/* Types defined in terms of the stdint.h */
typedef int8_t gint8;
typedef uint64_t guint64;
typedef float gfloat;
typedef double gdouble;
-typedef uint16_t gunichar2;
typedef int32_t gboolean;
#endif
#endif
+typedef guint16 gunichar2;
+typedef guint32 gunichar;
+
/*
* Macros
*/
void g_string_printf (GString *string, const gchar *format, ...);
void g_string_append_printf (GString *string, const gchar *format, ...);
void g_string_append_vprintf (GString *string, const gchar *format, va_list args);
+GString *g_string_append_unichar (GString *string, gunichar c);
GString *g_string_append_c (GString *string, gchar c);
GString *g_string_append (GString *string, const gchar *val);
GString *g_string_append_len (GString *string, const gchar *val, gssize len);
* only used if the old collation code is activated, so this is only the
* bare minimum to build.
*/
-typedef guint32 gunichar;
typedef enum {
G_UNICODE_CONTROL,
G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
} GConvertError;
-gchar* g_utf8_strup (const gchar *str, gssize len);
-gchar* g_utf8_strdown (const gchar *str, gssize len);
+gchar *g_utf8_strup (const gchar *str, gssize len);
+gchar *g_utf8_strdown (const gchar *str, gssize len);
+gint g_unichar_to_utf8 (gunichar c, gchar *outbuf);
gunichar2 *g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
gchar *g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
-gunichar2 *g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
gunichar *g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
+gchar *g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
+gunichar2 *g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
#define u8to16(str) g_utf8_to_utf16(str, (glong)strlen(str), NULL, NULL, NULL)
return string;
}
+GString *
+g_string_append_unichar (GString *string, gunichar c)
+{
+ gchar utf8[6];
+ gint len;
+
+ g_return_val_if_fail (string != NULL, NULL);
+
+ if ((len = g_unichar_to_utf8 (c, utf8)) <= 0)
+ return string;
+
+ return g_string_append_len (string, utf8, len);
+}
+
GString *
g_string_prepend (GString *string, const gchar *val)
{
return ret;
}
+gchar *
+g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error)
+{
+ gchar *outbuf, *outptr;
+ glong nwritten = 0;
+ glong i;
+ gint n;
+
+ if (len == -1) {
+ for (i = 0; str[i] != 0; i++) {
+ if ((n = g_unichar_to_utf8 (str[i], NULL)) < 0) {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ "Invalid sequence in conversion input");
+
+ if (items_read)
+ *items_read = i;
+
+ return NULL;
+ }
+
+ nwritten += n;
+ }
+ } else {
+ for (i = 0; i < len; i++) {
+ if ((n = g_unichar_to_utf8 (str[i], NULL)) < 0) {
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ "Invalid sequence in conversion input");
+
+ if (items_read)
+ *items_read = i;
+
+ return NULL;
+ }
+
+ nwritten += n;
+ }
+ }
+
+ outptr = outbuf = g_malloc (nwritten + 1);
+ if (len == -1) {
+ for (i = 0; str[i] != 0; i++)
+ outptr += g_unichar_to_utf8 (str[i], outptr);
+ } else {
+ for (i = 0; i < len; i++)
+ outptr += g_unichar_to_utf8 (str[i], outptr);
+ }
+ *outptr = '\0';
+
+ if (items_written)
+ *items_written = nwritten;
+
+ if (items_read != 0)
+ *items_read = i;
+
+ return outbuf;
+}
+
static glong
g_ucs4_to_utf16_len (const gunichar *str, glong len, glong *items_read, GError **error)
{
return retstr;
}
+
+/**
+ * from http://home.tiscali.nl/t876506/utf8tbl.html
+ *
+ * From Unicode UCS-4 to UTF-8:
+ * Start with the Unicode number expressed as a decimal number and call this ud.
+ *
+ * If ud <128 (7F hex) then UTF-8 is 1 byte long, the value of ud.
+ *
+ * If ud >=128 and <=2047 (7FF hex) then UTF-8 is 2 bytes long.
+ * byte 1 = 192 + (ud div 64)
+ * byte 2 = 128 + (ud mod 64)
+ *
+ * If ud >=2048 and <=65535 (FFFF hex) then UTF-8 is 3 bytes long.
+ * byte 1 = 224 + (ud div 4096)
+ * byte 2 = 128 + ((ud div 64) mod 64)
+ * byte 3 = 128 + (ud mod 64)
+ *
+ * If ud >=65536 and <=2097151 (1FFFFF hex) then UTF-8 is 4 bytes long.
+ * byte 1 = 240 + (ud div 262144)
+ * byte 2 = 128 + ((ud div 4096) mod 64)
+ * byte 3 = 128 + ((ud div 64) mod 64)
+ * byte 4 = 128 + (ud mod 64)
+ *
+ * If ud >=2097152 and <=67108863 (3FFFFFF hex) then UTF-8 is 5 bytes long.
+ * byte 1 = 248 + (ud div 16777216)
+ * byte 2 = 128 + ((ud div 262144) mod 64)
+ * byte 3 = 128 + ((ud div 4096) mod 64)
+ * byte 4 = 128 + ((ud div 64) mod 64)
+ * byte 5 = 128 + (ud mod 64)
+ *
+ * If ud >=67108864 and <=2147483647 (7FFFFFFF hex) then UTF-8 is 6 bytes long.
+ * byte 1 = 252 + (ud div 1073741824)
+ * byte 2 = 128 + ((ud div 16777216) mod 64)
+ * byte 3 = 128 + ((ud div 262144) mod 64)
+ * byte 4 = 128 + ((ud div 4096) mod 64)
+ * byte 5 = 128 + ((ud div 64) mod 64)
+ * byte 6 = 128 + (ud mod 64)
+ **/
+gint
+g_unichar_to_utf8 (gunichar c, gchar *outbuf)
+{
+ gint len, i;
+ char base;
+
+ if (c < 128UL) {
+ base = 0;
+ len = 1;
+ } else if (c < 2048UL) {
+ base = 192;
+ len = 2;
+ } else if (c < 65536UL) {
+ base = 224;
+ len = 3;
+ } else if (c < 2097152UL) {
+ base = 240;
+ len = 4;
+ } else if (c < 67108864UL) {
+ base = 248;
+ len = 5;
+ } else if (c < 2147483648UL) {
+ base = 252;
+ len = 6;
+ } else
+ return -1;
+
+ if (outbuf != NULL) {
+ for (i = len - 1; i > 0; i--) {
+ /* mask off 6 bits worth and add 128 */
+ outbuf[i] = 128 + (c & 0x3f);
+ c >>= 6;
+ }
+
+ /* first character has a different base */
+ outbuf[0] = base + (c & 0x3f);
+ }
+
+ return len;
+}