#endif
#include <glib.h>
-#include <stdio.h>
#include <string.h>
-#include <locale.h>
+#ifdef HAVE_ICONV_H
#include <iconv.h>
+#endif
#include <errno.h>
-#ifdef HAVE_CODESET
-#include <langinfo.h>
-#endif
+typedef enum {
+ LittleEndian,
+ BigEndian
+} Endian;
+
+typedef int (* Decoder) (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+typedef int (* Encoder) (gunichar c, char **outbytes, size_t *outbytesleft);
-#define ICONV_ISO_INT_FORMAT "iso-%u-%u"
-#define ICONV_ISO_STR_FORMAT "iso-%u-%s"
-#define ICONV_10646 "iso-10646"
-
-#define ICONV_CACHE_MAX_SIZE (16)
-
-typedef struct _ListNode {
- struct _ListNode *next;
- struct _ListNode *prev;
-} ListNode;
-
-typedef struct {
- ListNode *head;
- ListNode *tail;
- ListNode *tailpred;
-} List;
-
-typedef struct {
- GHashTable *hash;
- size_t size;
- List list;
-} GIConvCache;
-
-typedef struct {
- ListNode node;
- GIConvCache *cache;
- guint32 refcount : 31;
- guint32 used : 1;
+struct _GIConv {
+ Decoder decode;
+ Encoder encode;
+ gunichar c;
+#ifdef HAVE_ICONV
iconv_t cd;
- char *key;
-} GIConvCacheNode;
+#endif
+};
+static int decode_utf32be (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf32be (gunichar c, char **outbytes, size_t *outbytesleft);
-static GIConvCache *iconv_cache = NULL;
-static GHashTable *iconv_open_hash = NULL;
-static GHashTable *iconv_charsets = NULL;
-static char *locale_charset = NULL;
+static int decode_utf32le (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf32le (gunichar c, char **outbytes, size_t *outbytesleft);
-#ifdef G_THREADS_ENABLED
-static pthread_mutex_t iconv_cache_lock = PTHREAD_MUTEX_INITIALIZER;
-#define ICONV_CACHE_LOCK() pthread_mutex_lock (&iconv_cache_lock)
-#define ICONV_CACHE_UNLOCK() pthread_mutex_unlock (&iconv_cache_lock)
-#else
-#define ICONV_CACHE_LOCK()
-#define ICONV_CACHE_UNLOCK()
-#endif /* G_THREADS_ENABLED */
+static int decode_utf16be (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf16be (gunichar c, char **outbytes, size_t *outbytesleft);
+
+static int decode_utf16le (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf16le (gunichar c, char **outbytes, size_t *outbytesleft);
+
+static int decode_utf32 (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf32 (gunichar c, char **outbytes, size_t *outbytesleft);
+static int decode_utf16 (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf16 (gunichar c, char **outbytes, size_t *outbytesleft);
-/* a useful website on charset alaises:
- * http://www.li18nux.org/subgroups/sa/locnameguide/v1.1draft/CodesetAliasTable-V11.html */
+static int decode_utf8 (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf8 (gunichar c, char **outbytes, size_t *outbytesleft);
static struct {
- const char *charset; /* Note: expected to be lowercase */
- const char *iconv_name; /* Note: expected to be properly cased for iconv_open() */
-} known_iconv_charsets[] = {
- /* charset name, iconv-friendly name (sometimes case sensitive) */
- { "utf-8", "UTF-8" },
- { "utf8", "UTF-8" },
-
- /* ANSI_X3.4-1968 is used on some systems and should be
- treated the same as US-ASCII */
- { "ansi_x3.4-1968", NULL },
-
- /* 10646 is a special case, its usually UCS-2 big endian */
- /* This might need some checking but should be ok for
- solaris/linux */
- { "iso-10646-1", "UCS-2BE" },
- { "iso_10646-1", "UCS-2BE" },
- { "iso10646-1", "UCS-2BE" },
- { "iso-10646", "UCS-2BE" },
- { "iso_10646", "UCS-2BE" },
- { "iso10646", "UCS-2BE" },
-
- /* Korean charsets */
- /* Note: according to http://www.iana.org/assignments/character-sets,
- * ks_c_5601-1987 should really map to ISO-2022-KR, but the EUC-KR
- * mapping was given to me via a native Korean user, so I'm not sure
- * if I should change this... perhaps they are compatable? */
- { "ks_c_5601-1987", "EUC-KR" },
- { "5601", "EUC-KR" },
- { "ksc-5601", "EUC-KR" },
- { "ksc-5601-1987", "EUC-KR" },
- { "ksc-5601_1987", "EUC-KR" },
- { "ks_c_5861-1992", "EUC-KR" },
- { "euckr-0", "EUC-KR" },
-
- /* Chinese charsets */
- { "big5-0", "BIG5" },
- { "big5.eten-0", "BIG5" },
- { "big5hkscs-0", "BIG5HKSCS" },
- /* Note: GBK is a superset of gb2312 (see
- * http://en.wikipedia.org/wiki/GBK for details), so 'upgrade'
- * gb2312 to GBK so that we can completely convert GBK text
- * that is incorrectly tagged as gb2312 to UTF-8. */
- { "gb2312", "GBK" },
- { "gb-2312", "GBK" },
- { "gb2312-0", "GBK" },
- { "gb2312-80", "GBK" },
- { "gb2312.1980-0", "GBK" },
- /* euc-cn is an alias for gb2312 */
- { "euc-cn", "GBK" },
- { "gb18030-0", "gb18030" },
- { "gbk-0", "GBK" },
-
- /* Japanese charsets */
- { "eucjp-0", "eucJP" }, /* should this map to "EUC-JP" instead? */
- { "ujis-0", "ujis" }, /* we might want to map this to EUC-JP */
- { "jisx0208.1983-0", "SJIS" },
- { "jisx0212.1990-0", "SJIS" },
- { "pck", "SJIS" },
- { NULL, NULL }
+ const char *name;
+ Decoder decoder;
+ Encoder encoder;
+} charsets[] = {
+ { "UTF-32BE", decode_utf32be, encode_utf32be },
+ { "UTF-32LE", decode_utf32le, encode_utf32le },
+ { "UTF-16BE", decode_utf16be, encode_utf16be },
+ { "UTF-16LE", decode_utf16le, encode_utf16le },
+ { "UTF-32", decode_utf32, encode_utf32 },
+ { "UTF-16", decode_utf16, encode_utf16 },
+ { "UTF-8", decode_utf8, encode_utf8 },
+ { "UTF32", decode_utf32, encode_utf32 },
+ { "UTF16", decode_utf16, encode_utf16 },
+ { "UTF8", decode_utf8, encode_utf8 },
};
-static void
-list_init (List *list)
+GIConv
+g_iconv_open (const char *to_charset, const char *from_charset)
{
- list->head = (ListNode *) &list->tail;
- list->tail = NULL;
- list->tailpred = (ListNode *) &list->head;
+#ifdef HAVE_ICONV
+ iconv_t icd = (iconv_t) -1;
+#endif
+ Decoder decoder = NULL;
+ Encoder encoder = NULL;
+ GIConv cd;
+ guint i;
+
+ if (!to_charset || !from_charset || !to_charset[0] || !from_charset[0])
+ return (GIConv) -1;
+
+ for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
+ if (!g_ascii_strcasecmp (charsets[i].name, from_charset))
+ decoder = charsets[i].decoder;
+
+ if (!g_ascii_strcasecmp (charsets[i].name, to_charset))
+ encoder = charsets[i].encoder;
+ }
+
+ if (encoder == NULL || decoder == NULL) {
+#ifdef HAVE_ICONV
+ if ((icd = iconv_open (to_charset, from_charset)) == (iconv_t) -1)
+ return (GIConv) -1;
+#else
+ return (GIConv) -1;
+#endif
+ }
+
+ cd = (GIConv) g_malloc (sizeof (struct _GIConv));
+ cd->decode = decoder;
+ cd->encode = encoder;
+ cd->c = -1;
+
+#ifdef HAVE_ICONV
+ cd->cd = icd;
+#endif
+
+ return cd;
}
-static ListNode *
-list_prepend (List *list, ListNode *node)
+int
+g_iconv_close (GIConv cd)
{
- node->next = list->head;
- node->prev = (ListNode *) &list->head;
- list->head->prev = node;
- list->head = node;
+#ifdef HAVE_ICONV
+ if (cd->cd != (iconv_t) -1)
+ iconv_close (cd->cd);
+#endif
+
+ g_free (cd);
- return node;
+ return 0;
}
-static ListNode *
-list_unlink (ListNode *node)
+gsize
+g_iconv (GIConv cd, char **inbytes, size_t *inbytesleft,
+ char **outbytes, size_t *outbytesleft)
{
- node->next->prev = node->prev;
- node->prev->next = node->next;
+ size_t inleft, outleft;
+ char *inptr, *outptr;
+ gsize rc = 0;
+ gunichar c;
+
+#ifdef HAVE_ICONV
+ if (cd->cd != (iconv_t) -1)
+ return iconv (cd->cd, inbytes, inbytesleft, outbytes, outbytesleft);
+#endif
+
+ if (outbytes == NULL || outbytesleft == NULL) {
+ /* reset converter */
+ cd->c = -1;
+ return 0;
+ }
+
+ inleft = inbytesleft ? *inbytesleft : 0;
+ inptr = inbytes ? *inbytes : NULL;
+ outleft = *outbytesleft;
+ outptr = *outbytes;
+ c = cd->c;
- return node;
+ do {
+ if (c == (gunichar) -1 && cd->decode (&inptr, &inleft, &c) == -1) {
+ rc = -1;
+ break;
+ }
+
+ if (cd->encode (c, &outptr, &outleft) == -1) {
+ rc = -1;
+ break;
+ }
+
+ c = -1;
+ } while (inleft > 0 && outleft > 0);
+
+ if (inbytesleft)
+ *inbytesleft = inleft;
+
+ if (inbytes)
+ *inbytes = inptr;
+
+ *outbytesleft = outleft;
+ *outbytes = outptr;
+ cd->c = c;
+
+ return rc;
}
-static GIConvCacheNode *
-g_iconv_cache_node_new (const char *key, iconv_t cd)
+static int
+decode_utf32_be_or_le (Endian endian, char **inbytes, size_t *inbytesleft, gunichar *outchar)
{
- GIConvCacheNode *node;
+ gunichar *inptr = (gunichar *) *inbytes;
+ size_t inleft = *inbytesleft;
+ gunichar c;
+
+ if (inleft < 4) {
+ errno = EINVAL;
+ return -1;
+ }
- node = g_malloc (sizeof (GIConvCacheNode));
- node->key = g_strdup (key);
- node->refcount = 1;
- node->used = TRUE;
- node->cd = cd;
+ if (endian == BigEndian)
+ c = GUINT32_FROM_BE (*inptr);
+ else
+ c = GUINT32_FROM_LE (*inptr);
- return node;
-}
-
-static void
-g_iconv_cache_node_free (GIConvCacheNode *node)
-{
- iconv_close (node->cd);
- g_free (node->key);
+ inleft -= 4;
+ inptr++;
+
+ if (c >= 2147483648UL) {
+ errno = EILSEQ;
+ return -1;
+ }
+
+ *inbytes = (char *) inptr;
+ *inbytesleft = inleft;
+ *outchar = c;
+
+ return 0;
}
-static GIConvCache *
-g_iconv_cache_new (void)
+static int
+decode_utf32be (char **inbytes, size_t *inbytesleft, gunichar *outchar)
{
- GIConvCache *cache;
-
- cache = g_malloc (sizeof (GIConvCache));
- cache->hash = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, (GDestroyNotify) g_iconv_cache_node_free);
- list_init (&cache->list);
-
- return cache;
+ return decode_utf32_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
}
-static void
-g_iconv_cache_free (GIConvCache *cache)
+static int
+decode_utf32le (char **inbytes, size_t *inbytesleft, gunichar *outchar)
{
- g_hash_table_destroy (cache->hash);
- g_free (cache);
+ return decode_utf32_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
}
-static void
-g_iconv_cache_expire_unused (GIConvCache *cache)
+static int
+decode_utf32 (char **inbytes, size_t *inbytesleft, gunichar *outchar)
{
- ListNode *node, *prev;
- GIConvCacheNode *inode;
-
- node = cache->list.tailpred;
- while (node->prev && cache->size > ICONV_CACHE_MAX_SIZE) {
- inode = (GIConvCacheNode *) node;
- prev = node->prev;
- if (inode->refcount == 0) {
- list_unlink (node);
- g_hash_table_remove (cache->hash, inode->key);
- cache->size--;
- }
- node = prev;
- }
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+ return decode_utf32_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
+#else
+ return decode_utf32_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
+#endif
}
-static GIConvCacheNode *
-g_iconv_cache_insert (GIConvCache *cache, const char *key, iconv_t cd)
+static int
+encode_utf32_be_or_le (Endian endian, gunichar c, char **outbytes, size_t *outbytesleft)
{
- GIConvCacheNode *node;
+ gunichar *outptr = (gunichar *) *outbytes;
+ size_t outleft = *outbytesleft;
- cache->size++;
+ if (outleft < 4) {
+ errno = E2BIG;
+ return -1;
+ }
- if (cache->size > ICONV_CACHE_MAX_SIZE)
- g_iconv_cache_expire_unused (cache);
+ if (endian == BigEndian)
+ *outptr++ = GUINT32_TO_BE (c);
+ else
+ *outptr++ = GUINT32_TO_LE (c);
- node = g_iconv_cache_node_new (key, cd);
- node->cache = cache;
+ outleft -= 4;
- g_hash_table_insert (cache->hash, node->key, node);
- list_prepend (&cache->list, (ListNode *) node);
+ *outbytes = (char *) outptr;
+ *outbytesleft = outleft;
- return node;
+ return 0;
}
-static GIConvCacheNode *
-g_iconv_cache_lookup (GIConvCache *cache, const char *key, gboolean use)
+static int
+encode_utf32be (gunichar c, char **outbytes, size_t *outbytesleft)
{
- GIConvCacheNode *node;
-
- node = g_hash_table_lookup (cache->hash, key);
- if (node && use) {
- list_unlink ((ListNode *) node);
- list_prepend (&cache->list, (ListNode *) node);
- }
-
- return node;
+ return encode_utf32_be_or_le (BigEndian, c, outbytes, outbytesleft);
}
-static const char *
-strdown (char *str)
+static int
+encode_utf32le (gunichar c, char **outbytes, size_t *outbytesleft)
{
- register char *s = str;
-
- while (*s) {
- if (*s >= 'A' && *s <= 'Z')
- *s += 0x20;
- s++;
- }
-
- return str;
+ return encode_utf32_be_or_le (LittleEndian, c, outbytes, outbytesleft);
}
-const char *
-charset_to_iconv_name (const char *charset)
+static int
+encode_utf32 (gunichar c, char **outbytes, size_t *outbytesleft)
{
- char *name, *iconv_name, *buf;
-
- if (charset == NULL)
- return NULL;
-
- name = g_alloca (strlen (charset) + 1);
- strcpy (name, charset);
- strdown (name);
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+ return encode_utf32_be_or_le (LittleEndian, c, outbytes, outbytesleft);
+#else
+ return encode_utf32_be_or_le (BigEndian, c, outbytes, outbytesleft);
+#endif
+}
+
+static int
+decode_utf16_be_or_le (Endian endian, char **inbytes, size_t *inbytesleft, gunichar *outchar)
+{
+ gunichar2 *inptr = (gunichar2 *) *inbytes;
+ size_t inleft = *inbytesleft;
+ gunichar2 c;
+ gunichar u;
- if ((iconv_name = g_hash_table_lookup (iconv_charsets, name)))
- return iconv_name;
+ if (inleft < 2) {
+ errno = EINVAL;
+ return -1;
+ }
- if (!strncmp (name, "iso", 3)) {
- int iso, codepage;
- char *p;
+ if (endian == BigEndian)
+ u = GUINT16_FROM_BE (*inptr);
+ else
+ u = GUINT16_FROM_LE (*inptr);
+
+ inleft -= 2;
+ inptr++;
+
+ if (u >= 0xdc00 && u <= 0xdfff) {
+ errno = EILSEQ;
+ return -1;
+ } else if (u >= 0xd800 && u <= 0xdbff) {
+ if (inleft < 2) {
+ errno = EINVAL;
+ return -1;
+ }
- buf = name + 3;
- if (*buf == '-' || *buf == '_')
- buf++;
+ if (endian == BigEndian)
+ c = GUINT16_FROM_BE (*inptr);
+ else
+ c = GUINT16_FROM_LE (*inptr);
- iso = strtoul (buf, &p, 10);
+ inleft -= 2;
+ inptr++;
- if (iso == 10646) {
- /* they all become ICONV_10646 */
- iconv_name = g_strdup (ICONV_10646);
- } else if (p > buf) {
- buf = p;
- if (*buf == '-' || *buf == '_')
- buf++;
-
- codepage = strtoul (buf, &p, 10);
-
- if (p > buf) {
- /* codepage is numeric */
-#ifdef __aix__
- if (codepage == 13)
- iconv_name = g_strdup ("IBM-921");
- else
-#endif /* __aix__ */
- iconv_name = g_strdup_printf (ICONV_ISO_INT_FORMAT,
- iso, codepage);
- } else {
- /* codepage is a string - probably iso-2022-jp or something */
- iconv_name = g_strdup_printf (ICONV_ISO_STR_FORMAT,
- iso, p);
- }
- } else {
- /* p == buf, which probably means we've
- encountered an invalid iso charset name */
- iconv_name = g_strdup (name);
+ if (c < 0xdc00 || c > 0xdfff) {
+ errno = EILSEQ;
+ return -1;
}
- } else if (!strncmp (name, "windows-", 8)) {
- buf = name + 8;
- if (!strncmp (buf, "cp", 2))
- buf += 2;
- iconv_name = g_strdup_printf ("CP%s", buf);
- } else if (!strncmp (name, "microsoft-", 10)) {
- buf = name + 10;
- if (!strncmp (buf, "cp", 2))
- buf += 2;
-
- iconv_name = g_strdup_printf ("CP%s", buf);
- } else {
- /* assume charset name is ok as is? */
- iconv_name = g_strdup (charset);
+ u = ((u - 0xd800) << 10) + (c - 0xdc00) + 0x0010000UL;
}
- g_hash_table_insert (iconv_charsets, g_strdup (name), iconv_name);
+ *inbytes = (char *) inptr;
+ *inbytesleft = inleft;
+ *outchar = u;
- return iconv_name;
+ return 0;
}
+static int
+decode_utf16be (char **inbytes, size_t *inbytesleft, gunichar *outchar)
+{
+ return decode_utf16_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
+}
-static void
-iconv_open_node_free (gpointer key, gpointer value, gpointer user_data)
+static int
+decode_utf16le (char **inbytes, size_t *inbytesleft, gunichar *outchar)
{
- iconv_t cd = (iconv_t) key;
- GIConvCacheNode *node;
-
- node = (GIConvCacheNode *) g_iconv_cache_lookup (iconv_cache, value, FALSE);
- g_assert (node);
-
- if (cd != node->cd) {
- node->refcount--;
- iconv_close (cd);
- }
+ return decode_utf16_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
}
-static void
-g_iconv_shutdown (void)
+static int
+decode_utf16 (char **inbytes, size_t *inbytesleft, gunichar *outchar)
{
- if (!iconv_cache)
- return;
-
- g_hash_table_foreach (iconv_open_hash, iconv_open_node_free, NULL);
- g_hash_table_destroy (iconv_open_hash);
- iconv_open_hash = NULL;
-
- g_iconv_cache_free (iconv_cache);
- iconv_cache = NULL;
-
- g_hash_table_destroy (iconv_charsets);
- iconv_charsets = NULL;
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+ return decode_utf16_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
+#else
+ return decode_utf16_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
+#endif
}
-static void
-g_iconv_init (void)
+static int
+encode_utf16_be_or_le (Endian endian, gunichar c, char **outbytes, size_t *outbytesleft)
{
- char *charset, *iconv_name;
- int i;
-
- if (iconv_cache)
- return;
-
- iconv_charsets = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
- iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
- iconv_cache = g_iconv_cache_new ();
-
- for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
- iconv_name = g_strdup (known_iconv_charsets[i].iconv_name);
- charset = g_strdup (known_iconv_charsets[i].charset);
-
- g_hash_table_insert (iconv_charsets, charset, iconv_name);
+ gunichar2 *outptr = (gunichar2 *) *outbytes;
+ size_t outleft = *outbytesleft;
+ gunichar2 ch;
+ gunichar c2;
+
+ if (outleft < 2) {
+ errno = E2BIG;
+ return -1;
}
- if (!((locale_charset = getenv ("CHARSET")) && *locale_charset)) {
-#ifdef HAVE_CODESET
- if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0])
- locale_charset = g_ascii_strdown (locale_charset, -1);
+ if (c <= 0xffff && (c < 0xd800 || c > 0xdfff)) {
+ ch = (gunichar2) c;
+
+ if (endian == BigEndian)
+ *outptr++ = GUINT16_TO_BE (ch);
else
- locale_charset = NULL;
-#endif
+ *outptr++ = GUINT16_TO_LE (ch);
- if (!locale_charset) {
- char *locale = setlocale (LC_ALL, NULL);
-
- if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
- /* The locale "C" or "POSIX" is a portable locale; its
- * LC_CTYPE part corresponds to the 7-bit ASCII character
- * set. */
- } else {
- /* A locale name is typically of the form language[_terri-
- * tory][.codeset][@modifier], where language is an ISO 639
- * language code, territory is an ISO 3166 country code, and
- * codeset is a character set or encoding identifier like
- * ISO-8859-1 or UTF-8.
- */
- char *codeset, *p;
-
- if (!locale_charset) {
- codeset = strchr (locale, '.');
- if (codeset) {
- codeset++;
-
- /* ; is a hack for debian systems and / is a hack for Solaris systems */
- p = codeset;
- while (*p && !strchr ("@;/", *p))
- p++;
-
- locale_charset = g_ascii_strdown (codeset, (size_t)(p - codeset));
- } else {
- /* charset unknown */
- locale_charset = NULL;
- }
- }
- }
- }
+ outleft -= 2;
+ } else if (outleft < 4) {
+ errno = E2BIG;
+ return -1;
+ } else {
+ c2 = c - 0x10000;
+
+ ch = (gunichar2) ((c2 >> 10) + 0xd800);
+ if (endian == BigEndian)
+ *outptr++ = GUINT16_TO_BE (ch);
+ else
+ *outptr++ = GUINT16_TO_LE (ch);
+
+ ch = (gunichar2) ((c2 & 0x3ff) + 0xdc00);
+ if (endian == BigEndian)
+ *outptr++ = GUINT16_TO_BE (ch);
+ else
+ *outptr++ = GUINT16_TO_LE (ch);
+
+ outleft -= 4;
}
+
+ *outbytes = (char *) outptr;
+ *outbytesleft = outleft;
+
+ return 0;
}
-gsize
-g_iconv (GIConv converter, gchar **inbuf, gsize *inleft, gchar **outbuf, gsize *outleft)
+static int
+encode_utf16be (gunichar c, char **outbytes, size_t *outbytesleft)
{
- return iconv ((iconv_t) converter, inbuf, inleft, outbuf, outleft);
+ return encode_utf16_be_or_le (BigEndian, c, outbytes, outbytesleft);
}
-GIConv
-g_iconv_open (const gchar *to, const gchar *from)
+static int
+encode_utf16le (gunichar c, char **outbytes, size_t *outbytesleft)
{
- GIConvCacheNode *node;
- iconv_t cd;
- char *key;
+ return encode_utf16_be_or_le (LittleEndian, c, outbytes, outbytesleft);
+}
+
+static int
+encode_utf16 (gunichar c, char **outbytes, size_t *outbytesleft)
+{
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+ return encode_utf16_be_or_le (LittleEndian, c, outbytes, outbytesleft);
+#else
+ return encode_utf16_be_or_le (BigEndian, c, outbytes, outbytesleft);
+#endif
+}
+
+static int
+decode_utf8 (char **inbytes, size_t *inbytesleft, gunichar *outchar)
+{
+ size_t inleft = *inbytesleft;
+ char *inptr = *inbytes;
+ size_t i, len = 0;
+ unsigned char c;
+ gunichar u;
+
+ c = *inptr++;
+
+ if (c < 0x80) {
+ /* simple ascii case */
+ len = 1;
+ } else if (c < 0xe0) {
+ c &= 0x1f;
+ len = 2;
+ } else if (c < 0xf0) {
+ c &= 0x0f;
+ len = 3;
+ } else if (c < 0xf8) {
+ c &= 0x07;
+ len = 4;
+ } else if (c < 0xfc) {
+ c &= 0x03;
+ len = 5;
+ } else if (c < 0xfe) {
+ c &= 0x01;
+ len = 6;
+ } else {
+ errno = EILSEQ;
+ return -1;
+ }
- if (from == NULL || to == NULL) {
+ if (len > inleft) {
errno = EINVAL;
- return (GIConv) -1;
+ return -1;
}
- ICONV_CACHE_LOCK ();
-
- g_iconv_init ();
-
- if (!g_ascii_strcasecmp (from, "x-unknown"))
- from = locale_charset;
-
- from = charset_to_iconv_name (from);
- to = charset_to_iconv_name (to);
- key = g_alloca (strlen (from) + strlen (to) + 2);
- sprintf (key, "%s:%s", from, to);
-
- if ((node = g_iconv_cache_lookup (iconv_cache, key, TRUE))) {
- if (node->used) {
- if ((cd = iconv_open (to, from)) == (iconv_t) -1)
- goto exception;
- } else {
- /* Apparently iconv on Solaris <= 7 segfaults if you pass in
- * NULL for anything but inbuf; work around that. (NULL outbuf
- * or NULL *outbuf is allowed by Unix98.)
- */
- size_t inleft = 0, outleft = 0;
- char *outbuf = NULL;
-
- cd = node->cd;
- node->used = TRUE;
-
- /* reset the descriptor */
- iconv (cd, NULL, &inleft, &outbuf, &outleft);
- }
-
- node->refcount++;
- } else {
- if ((cd = iconv_open (to, from)) == (iconv_t) -1)
- goto exception;
-
- node = g_iconv_cache_insert (iconv_cache, key, cd);
+ u = c;
+ for (i = 1; i < len; i++) {
+ u = (u << 6) | ((*inptr) & 0x3f);
+ inptr++;
}
- g_hash_table_insert (iconv_open_hash, cd, node->key);
-
- ICONV_CACHE_UNLOCK ();
-
- return (GIConv) cd;
-
- exception:
+ *inbytesleft = inleft - len;
+ *inbytes = inptr;
+ *outchar = u;
- ICONV_CACHE_UNLOCK ();
-
- return (GIConv) -1;
+ return 0;
}
-int
-g_iconv_close (GIConv converter)
+static int
+encode_utf8 (gunichar c, char **outbytes, size_t *outbytesleft)
{
- GIConvCacheNode *node;
- const char *key;
- iconv_t cd;
-
- if (converter == (GIConv) -1)
- return 0;
-
- cd = (iconv_t) converter;
-
- ICONV_CACHE_LOCK ();
+ size_t outleft = *outbytesleft;
+ char *outptr = *outbytes;
+ size_t len, i;
+ int base;
+
+ if (c < 128UL) {
+ base = 0;
+ len = 1;
+ } else if (c < 2048UL) {
+ base = 192;
+ len = 2;
+ } else if (c < 65536UL) {
+ base = 224;
+ len = 3;
+ } else if (c < 2097152UL) {
+ base = 240;
+ len = 4;
+ } else if (c < 67108864UL) {
+ base = 248;
+ len = 5;
+ } else if (c < 2147483648UL) {
+ base = 252;
+ len = 6;
+ } else {
+ errno = EINVAL;
+ return -1;
+ }
- g_iconv_init ();
+ if (outleft < len) {
+ errno = E2BIG;
+ return -1;
+ }
- if ((key = g_hash_table_lookup (iconv_open_hash, cd))) {
- g_hash_table_remove (iconv_open_hash, cd);
-
- node = (GIConvCacheNode *) g_iconv_cache_lookup (iconv_cache, key, FALSE);
- g_assert (node);
-
- if (iconv_cache->size > ICONV_CACHE_MAX_SIZE) {
- /* expire before unreffing this node so that it wont get uncached */
- g_iconv_cache_expire_unused (iconv_cache);
- }
-
- node->refcount--;
-
- if (cd == node->cd)
- node->used = FALSE;
- else
- iconv_close (cd);
- } else {
- ICONV_CACHE_UNLOCK ();
-
- /* really this is an error... someone is trying to close an
- * iconv_t descriptor that wasn't opened by us. */
-
- return iconv_close (cd);
+ for (i = len - 1; i > 0; i--) {
+ /* mask off 6 bits worth and add 128 */
+ outptr[i] = 128 + (c & 0x3f);
+ c >>= 6;
}
- ICONV_CACHE_UNLOCK ();
+ /* first character has a different base */
+ outptr[0] = base + c;
+
+ *outbytesleft = outleft - len;
+ *outbytes = outptr + len;
return 0;
}