Implemented g_iconv() w/ needing libiconv
authorJeffrey Stedfast <fejj@gnome.org>
Wed, 20 Apr 2011 19:08:20 +0000 (15:08 -0400)
committerJeffrey Stedfast <fejj@gnome.org>
Wed, 20 Apr 2011 19:08:20 +0000 (15:08 -0400)
eglib/src/giconv.c
eglib/src/glib.h
eglib/src/gstr.c
eglib/src/gunicode.c

index dbdd203acb2e941d927bf32e0300d6a3e9cc0675..acfddcac7c051f8d6d1b6675a6787bd434124892 100644 (file)
 #endif
 
 #include <glib.h>
-#include <stdio.h>
 #include <string.h>
-#include <locale.h>
+#ifdef HAVE_ICONV_H
 #include <iconv.h>
+#endif
 #include <errno.h>
 
-#ifdef HAVE_CODESET
-#include <langinfo.h>
-#endif
+typedef enum {
+       LittleEndian,
+       BigEndian
+} Endian;
+
+typedef int (* Decoder) (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+typedef int (* Encoder) (gunichar c, char **outbytes, size_t *outbytesleft);
 
-#define ICONV_ISO_INT_FORMAT "iso-%u-%u"
-#define ICONV_ISO_STR_FORMAT "iso-%u-%s"
-#define ICONV_10646 "iso-10646"
-
-#define ICONV_CACHE_MAX_SIZE   (16)
-
-typedef struct _ListNode {
-       struct _ListNode *next;
-       struct _ListNode *prev;
-} ListNode;
-
-typedef struct {
-       ListNode *head;
-       ListNode *tail;
-       ListNode *tailpred;
-} List;
-
-typedef struct {
-       GHashTable *hash;
-       size_t size;
-       List list;
-} GIConvCache;
-
-typedef struct {
-       ListNode node;
-       GIConvCache *cache;
-       guint32 refcount : 31;
-       guint32 used : 1;
+struct _GIConv {
+       Decoder decode;
+       Encoder encode;
+       gunichar c;
+#ifdef HAVE_ICONV
        iconv_t cd;
-       char *key;
-} GIConvCacheNode;
+#endif
+};
 
+static int decode_utf32be (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf32be (gunichar c, char **outbytes, size_t *outbytesleft);
 
-static GIConvCache *iconv_cache = NULL;
-static GHashTable *iconv_open_hash = NULL;
-static GHashTable *iconv_charsets = NULL;
-static char *locale_charset = NULL;
+static int decode_utf32le (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf32le (gunichar c, char **outbytes, size_t *outbytesleft);
 
-#ifdef G_THREADS_ENABLED
-static pthread_mutex_t iconv_cache_lock = PTHREAD_MUTEX_INITIALIZER;
-#define ICONV_CACHE_LOCK()   pthread_mutex_lock (&iconv_cache_lock)
-#define ICONV_CACHE_UNLOCK() pthread_mutex_unlock (&iconv_cache_lock)
-#else
-#define ICONV_CACHE_LOCK()
-#define ICONV_CACHE_UNLOCK()
-#endif /* G_THREADS_ENABLED */
+static int decode_utf16be (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf16be (gunichar c, char **outbytes, size_t *outbytesleft);
+
+static int decode_utf16le (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf16le (gunichar c, char **outbytes, size_t *outbytesleft);
+
+static int decode_utf32 (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf32 (gunichar c, char **outbytes, size_t *outbytesleft);
 
+static int decode_utf16 (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf16 (gunichar c, char **outbytes, size_t *outbytesleft);
 
-/* a useful website on charset alaises:
- * http://www.li18nux.org/subgroups/sa/locnameguide/v1.1draft/CodesetAliasTable-V11.html */
+static int decode_utf8 (char **inbytes, size_t *inbytesleft, gunichar *outchar);
+static int encode_utf8 (gunichar c, char **outbytes, size_t *outbytesleft);
 
 static struct {
-       const char *charset;     /* Note: expected to be lowercase */
-       const char *iconv_name;  /* Note: expected to be properly cased for iconv_open() */
-} known_iconv_charsets[] = {
-       /* charset name, iconv-friendly name (sometimes case sensitive) */
-       { "utf-8",           "UTF-8"      },
-       { "utf8",            "UTF-8"      },
-       
-       /* ANSI_X3.4-1968 is used on some systems and should be
-          treated the same as US-ASCII */
-       { "ansi_x3.4-1968",  NULL         },
-       
-       /* 10646 is a special case, its usually UCS-2 big endian */
-       /* This might need some checking but should be ok for
-           solaris/linux */
-       { "iso-10646-1",     "UCS-2BE"    },
-       { "iso_10646-1",     "UCS-2BE"    },
-       { "iso10646-1",      "UCS-2BE"    },
-       { "iso-10646",       "UCS-2BE"    },
-       { "iso_10646",       "UCS-2BE"    },
-       { "iso10646",        "UCS-2BE"    },
-       
-       /* Korean charsets */
-       /* Note: according to http://www.iana.org/assignments/character-sets,
-        * ks_c_5601-1987 should really map to ISO-2022-KR, but the EUC-KR
-        * mapping was given to me via a native Korean user, so I'm not sure
-        * if I should change this... perhaps they are compatable? */
-       { "ks_c_5601-1987",  "EUC-KR"     },
-       { "5601",            "EUC-KR"     },
-       { "ksc-5601",        "EUC-KR"     },
-       { "ksc-5601-1987",   "EUC-KR"     },
-       { "ksc-5601_1987",   "EUC-KR"     },
-       { "ks_c_5861-1992",  "EUC-KR"     },
-       { "euckr-0",         "EUC-KR"     },
-       
-       /* Chinese charsets */
-       { "big5-0",          "BIG5"       },
-       { "big5.eten-0",     "BIG5"       },
-       { "big5hkscs-0",     "BIG5HKSCS"  },
-       /* Note: GBK is a superset of gb2312 (see
-        * http://en.wikipedia.org/wiki/GBK for details), so 'upgrade'
-        * gb2312 to GBK so that we can completely convert GBK text
-        * that is incorrectly tagged as gb2312 to UTF-8. */
-       { "gb2312",          "GBK"        },
-       { "gb-2312",         "GBK"        },
-       { "gb2312-0",        "GBK"        },
-       { "gb2312-80",       "GBK"        },
-       { "gb2312.1980-0",   "GBK"        },
-       /* euc-cn is an alias for gb2312 */
-       { "euc-cn",          "GBK"        },
-       { "gb18030-0",       "gb18030"    },
-       { "gbk-0",           "GBK"        },
-       
-       /* Japanese charsets */
-       { "eucjp-0",         "eucJP"      },  /* should this map to "EUC-JP" instead? */
-       { "ujis-0",          "ujis"       },  /* we might want to map this to EUC-JP */
-       { "jisx0208.1983-0", "SJIS"       },
-       { "jisx0212.1990-0", "SJIS"       },
-       { "pck",             "SJIS"       },
-       { NULL,              NULL         }
+       const char *name;
+       Decoder decoder;
+       Encoder encoder;
+} charsets[] = {
+       { "UTF-32BE", decode_utf32be, encode_utf32be },
+       { "UTF-32LE", decode_utf32le, encode_utf32le },
+       { "UTF-16BE", decode_utf16be, encode_utf16be },
+       { "UTF-16LE", decode_utf16le, encode_utf16le },
+       { "UTF-32",   decode_utf32,   encode_utf32   },
+       { "UTF-16",   decode_utf16,   encode_utf16   },
+       { "UTF-8",    decode_utf8,    encode_utf8    },
+       { "UTF32",    decode_utf32,   encode_utf32   },
+       { "UTF16",    decode_utf16,   encode_utf16   },
+       { "UTF8",     decode_utf8,    encode_utf8    },
 };
 
 
-static void
-list_init (List *list)
+GIConv
+g_iconv_open (const char *to_charset, const char *from_charset)
 {
-       list->head = (ListNode *) &list->tail;
-       list->tail = NULL;
-       list->tailpred = (ListNode *) &list->head;
+#ifdef HAVE_ICONV
+       iconv_t icd = (iconv_t) -1;
+#endif
+       Decoder decoder = NULL;
+       Encoder encoder = NULL;
+       GIConv cd;
+       guint i;
+       
+       if (!to_charset || !from_charset || !to_charset[0] || !from_charset[0])
+               return (GIConv) -1;
+       
+       for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
+               if (!g_ascii_strcasecmp (charsets[i].name, from_charset))
+                       decoder = charsets[i].decoder;
+               
+               if (!g_ascii_strcasecmp (charsets[i].name, to_charset))
+                       encoder = charsets[i].encoder;
+       }
+       
+       if (encoder == NULL || decoder == NULL) {
+#ifdef HAVE_ICONV
+               if ((icd = iconv_open (to_charset, from_charset)) == (iconv_t) -1)
+                       return (GIConv) -1;
+#else
+               return (GIConv) -1;
+#endif
+       }
+       
+       cd = (GIConv) g_malloc (sizeof (struct _GIConv));
+       cd->decode = decoder;
+       cd->encode = encoder;
+       cd->c = -1;
+       
+#ifdef HAVE_ICONV
+       cd->cd = icd;
+#endif
+       
+       return cd;
 }
 
-static ListNode *
-list_prepend (List *list, ListNode *node)
+int
+g_iconv_close (GIConv cd)
 {
-       node->next = list->head;
-       node->prev = (ListNode *) &list->head;
-       list->head->prev = node;
-       list->head = node;
+#ifdef HAVE_ICONV
+       if (cd->cd != (iconv_t) -1)
+               iconv_close (cd->cd);
+#endif
+       
+       g_free (cd);
        
-       return node;
+       return 0;
 }
 
-static ListNode *
-list_unlink (ListNode *node)
+gsize
+g_iconv (GIConv cd, char **inbytes, size_t *inbytesleft,
+        char **outbytes, size_t *outbytesleft)
 {
-       node->next->prev = node->prev;
-        node->prev->next = node->next;
+       size_t inleft, outleft;
+       char *inptr, *outptr;
+       gsize rc = 0;
+       gunichar c;
+       
+#ifdef HAVE_ICONV
+       if (cd->cd != (iconv_t) -1)
+               return iconv (cd->cd, inbytes, inbytesleft, outbytes, outbytesleft);
+#endif
+       
+       if (outbytes == NULL || outbytesleft == NULL) {
+               /* reset converter */
+               cd->c = -1;
+               return 0;
+       }
+       
+       inleft = inbytesleft ? *inbytesleft : 0;
+       inptr = inbytes ? *inbytes : NULL;
+       outleft = *outbytesleft;
+       outptr = *outbytes;
+       c = cd->c;
        
-       return node;
+       do {
+               if (c == (gunichar) -1 && cd->decode (&inptr, &inleft, &c) == -1) {
+                       rc = -1;
+                       break;
+               }
+               
+               if (cd->encode (c, &outptr, &outleft) == -1) {
+                       rc = -1;
+                       break;
+               }
+               
+               c = -1;
+       } while (inleft > 0 && outleft > 0);
+       
+       if (inbytesleft)
+               *inbytesleft = inleft;
+       
+       if (inbytes)
+               *inbytes = inptr;
+       
+       *outbytesleft = outleft;
+       *outbytes = outptr;
+       cd->c = c;
+       
+       return rc;
 }
 
 
-static GIConvCacheNode *
-g_iconv_cache_node_new (const char *key, iconv_t cd)
+static int
+decode_utf32_be_or_le (Endian endian, char **inbytes, size_t *inbytesleft, gunichar *outchar)
 {
-       GIConvCacheNode *node;
+       gunichar *inptr = (gunichar *) *inbytes;
+       size_t inleft = *inbytesleft;
+       gunichar c;
+       
+       if (inleft < 4) {
+               errno = EINVAL;
+               return -1;
+       }
        
-       node = g_malloc (sizeof (GIConvCacheNode));
-       node->key = g_strdup (key);
-       node->refcount = 1;
-       node->used = TRUE;
-       node->cd = cd;
+       if (endian == BigEndian)
+               c = GUINT32_FROM_BE (*inptr);
+       else
+               c = GUINT32_FROM_LE (*inptr);
        
-       return node;
-}
-
-static void
-g_iconv_cache_node_free (GIConvCacheNode *node)
-{
-       iconv_close (node->cd);
-       g_free (node->key);
+       inleft -= 4;
+       inptr++;
+       
+       if (c >= 2147483648UL) {
+               errno = EILSEQ;
+               return -1;
+       }
+       
+       *inbytes = (char *) inptr;
+       *inbytesleft = inleft;
+       *outchar = c;
+       
+       return 0;
 }
 
-static GIConvCache *
-g_iconv_cache_new (void)
+static int
+decode_utf32be (char **inbytes, size_t *inbytesleft, gunichar *outchar)
 {
-       GIConvCache *cache;
-       
-       cache = g_malloc (sizeof (GIConvCache));
-       cache->hash = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, (GDestroyNotify) g_iconv_cache_node_free);
-       list_init (&cache->list);
-       
-       return cache;
+       return decode_utf32_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
 }
 
-static void
-g_iconv_cache_free (GIConvCache *cache)
+static int
+decode_utf32le (char **inbytes, size_t *inbytesleft, gunichar *outchar)
 {
-       g_hash_table_destroy (cache->hash);
-       g_free (cache);
+       return decode_utf32_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
 }
 
-static void
-g_iconv_cache_expire_unused (GIConvCache *cache)
+static int
+decode_utf32 (char **inbytes, size_t *inbytesleft, gunichar *outchar)
 {
-       ListNode *node, *prev;
-       GIConvCacheNode *inode;
-       
-       node = cache->list.tailpred;
-       while (node->prev && cache->size > ICONV_CACHE_MAX_SIZE) {
-               inode = (GIConvCacheNode *) node;
-               prev = node->prev;
-               if (inode->refcount == 0) {
-                       list_unlink (node);
-                       g_hash_table_remove (cache->hash, inode->key);
-                       cache->size--;
-               }
-               node = prev;
-       }
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+       return decode_utf32_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
+#else
+       return decode_utf32_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
+#endif
 }
 
-static GIConvCacheNode *
-g_iconv_cache_insert (GIConvCache *cache, const char *key, iconv_t cd)
+static int
+encode_utf32_be_or_le (Endian endian, gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       GIConvCacheNode *node;
+       gunichar *outptr = (gunichar *) *outbytes;
+       size_t outleft = *outbytesleft;
        
-       cache->size++;
+       if (outleft < 4) {
+               errno = E2BIG;
+               return -1;
+       }
        
-       if (cache->size > ICONV_CACHE_MAX_SIZE)
-               g_iconv_cache_expire_unused (cache);
+       if (endian == BigEndian)
+               *outptr++ = GUINT32_TO_BE (c);
+       else
+               *outptr++ = GUINT32_TO_LE (c);
        
-       node = g_iconv_cache_node_new (key, cd);
-       node->cache = cache;
+       outleft -= 4;
        
-       g_hash_table_insert (cache->hash, node->key, node);
-       list_prepend (&cache->list, (ListNode *) node);
+       *outbytes = (char *) outptr;
+       *outbytesleft = outleft;
        
-       return node;
+       return 0;
 }
 
-static GIConvCacheNode *
-g_iconv_cache_lookup (GIConvCache *cache, const char *key, gboolean use)
+static int
+encode_utf32be (gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       GIConvCacheNode *node;
-       
-       node = g_hash_table_lookup (cache->hash, key);
-       if (node && use) {
-               list_unlink ((ListNode *) node);
-               list_prepend (&cache->list, (ListNode *) node);
-       }
-       
-       return node;
+       return encode_utf32_be_or_le (BigEndian, c, outbytes, outbytesleft);
 }
 
-static const char *
-strdown (char *str)
+static int
+encode_utf32le (gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       register char *s = str;
-       
-       while (*s) {
-               if (*s >= 'A' && *s <= 'Z')
-                       *s += 0x20;
-               s++;
-       }
-       
-       return str;
+       return encode_utf32_be_or_le (LittleEndian, c, outbytes, outbytesleft);
 }
 
-const char *
-charset_to_iconv_name (const char *charset)
+static int
+encode_utf32 (gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       char *name, *iconv_name, *buf;
-       
-       if (charset == NULL)
-               return NULL;
-       
-       name = g_alloca (strlen (charset) + 1);
-       strcpy (name, charset);
-       strdown (name);
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+       return encode_utf32_be_or_le (LittleEndian, c, outbytes, outbytesleft);
+#else
+       return encode_utf32_be_or_le (BigEndian, c, outbytes, outbytesleft);
+#endif
+}
+
+static int
+decode_utf16_be_or_le (Endian endian, char **inbytes, size_t *inbytesleft, gunichar *outchar)
+{
+       gunichar2 *inptr = (gunichar2 *) *inbytes;
+       size_t inleft = *inbytesleft;
+       gunichar2 c;
+       gunichar u;
        
-       if ((iconv_name = g_hash_table_lookup (iconv_charsets, name)))
-               return iconv_name;
+       if (inleft < 2) {
+               errno = EINVAL;
+               return -1;
+       }
        
-       if (!strncmp (name, "iso", 3)) {
-               int iso, codepage;
-               char *p;
+       if (endian == BigEndian)
+               u = GUINT16_FROM_BE (*inptr);
+       else
+               u = GUINT16_FROM_LE (*inptr);
+       
+       inleft -= 2;
+       inptr++;
+       
+       if (u >= 0xdc00 && u <= 0xdfff) {
+               errno = EILSEQ;
+               return -1;
+       } else if (u >= 0xd800 && u <= 0xdbff) {
+               if (inleft < 2) {
+                       errno = EINVAL;
+                       return -1;
+               }
                
-               buf = name + 3;
-               if (*buf == '-' || *buf == '_')
-                       buf++;
+               if (endian == BigEndian)
+                       c = GUINT16_FROM_BE (*inptr);
+               else
+                       c = GUINT16_FROM_LE (*inptr);
                
-               iso = strtoul (buf, &p, 10);
+               inleft -= 2;
+               inptr++;
                
-               if (iso == 10646) {
-                       /* they all become ICONV_10646 */
-                       iconv_name = g_strdup (ICONV_10646);
-               } else if (p > buf) {
-                       buf = p;
-                       if (*buf == '-' || *buf == '_')
-                               buf++;
-                       
-                       codepage = strtoul (buf, &p, 10);
-                       
-                       if (p > buf) {
-                               /* codepage is numeric */
-#ifdef __aix__
-                               if (codepage == 13)
-                                       iconv_name = g_strdup ("IBM-921");
-                               else
-#endif /* __aix__ */
-                                       iconv_name = g_strdup_printf (ICONV_ISO_INT_FORMAT,
-                                                                     iso, codepage);
-                       } else {
-                               /* codepage is a string - probably iso-2022-jp or something */
-                               iconv_name = g_strdup_printf (ICONV_ISO_STR_FORMAT,
-                                                             iso, p);
-                       }
-               } else {
-                       /* p == buf, which probably means we've
-                          encountered an invalid iso charset name */
-                       iconv_name = g_strdup (name);
+               if (c < 0xdc00 || c > 0xdfff) {
+                       errno = EILSEQ;
+                       return -1;
                }
-       } else if (!strncmp (name, "windows-", 8)) {
-               buf = name + 8;
-               if (!strncmp (buf, "cp", 2))
-                       buf += 2;
                
-               iconv_name = g_strdup_printf ("CP%s", buf);
-       } else if (!strncmp (name, "microsoft-", 10)) {
-               buf = name + 10;
-               if (!strncmp (buf, "cp", 2))
-                       buf += 2;
-               
-               iconv_name = g_strdup_printf ("CP%s", buf);
-       } else {
-               /* assume charset name is ok as is? */
-               iconv_name = g_strdup (charset);
+               u = ((u - 0xd800) << 10) + (c - 0xdc00) + 0x0010000UL;
        }
        
-       g_hash_table_insert (iconv_charsets, g_strdup (name), iconv_name);
+       *inbytes = (char *) inptr;
+       *inbytesleft = inleft;
+       *outchar = u;
        
-       return iconv_name;
+       return 0;
 }
 
+static int
+decode_utf16be (char **inbytes, size_t *inbytesleft, gunichar *outchar)
+{
+       return decode_utf16_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
+}
 
-static void
-iconv_open_node_free (gpointer key, gpointer value, gpointer user_data)
+static int
+decode_utf16le (char **inbytes, size_t *inbytesleft, gunichar *outchar)
 {
-       iconv_t cd = (iconv_t) key;
-       GIConvCacheNode *node;
-       
-       node = (GIConvCacheNode *) g_iconv_cache_lookup (iconv_cache, value, FALSE);
-       g_assert (node);
-       
-       if (cd != node->cd) {
-               node->refcount--;
-               iconv_close (cd);
-       }
+       return decode_utf16_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
 }
 
-static void
-g_iconv_shutdown (void)
+static int
+decode_utf16 (char **inbytes, size_t *inbytesleft, gunichar *outchar)
 {
-       if (!iconv_cache)
-               return;
-       
-       g_hash_table_foreach (iconv_open_hash, iconv_open_node_free, NULL);
-       g_hash_table_destroy (iconv_open_hash);
-       iconv_open_hash = NULL;
-       
-       g_iconv_cache_free (iconv_cache);
-       iconv_cache = NULL;
-       
-       g_hash_table_destroy (iconv_charsets);
-       iconv_charsets = NULL;
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+       return decode_utf16_be_or_le (LittleEndian, inbytes, inbytesleft, outchar);
+#else
+       return decode_utf16_be_or_le (BigEndian, inbytes, inbytesleft, outchar);
+#endif
 }
 
-static void
-g_iconv_init (void)
+static int
+encode_utf16_be_or_le (Endian endian, gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       char *charset, *iconv_name;
-       int i;
-       
-       if (iconv_cache)
-               return;
-       
-       iconv_charsets = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
-       iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
-       iconv_cache = g_iconv_cache_new ();
-       
-       for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
-               iconv_name = g_strdup (known_iconv_charsets[i].iconv_name);
-               charset = g_strdup (known_iconv_charsets[i].charset);
-               
-               g_hash_table_insert (iconv_charsets, charset, iconv_name);
+       gunichar2 *outptr = (gunichar2 *) *outbytes;
+       size_t outleft = *outbytesleft;
+       gunichar2 ch;
+       gunichar c2;
+       
+       if (outleft < 2) {
+               errno = E2BIG;
+               return -1;
        }
        
-       if (!((locale_charset = getenv ("CHARSET")) && *locale_charset)) {
-#ifdef HAVE_CODESET
-               if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0])
-                       locale_charset = g_ascii_strdown (locale_charset, -1);
+       if (c <= 0xffff && (c < 0xd800 || c > 0xdfff)) {
+               ch = (gunichar2) c;
+               
+               if (endian == BigEndian)
+                       *outptr++ = GUINT16_TO_BE (ch);
                else
-                       locale_charset = NULL;
-#endif
+                       *outptr++ = GUINT16_TO_LE (ch);
                
-               if (!locale_charset) {
-                       char *locale = setlocale (LC_ALL, NULL);
-                       
-                       if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
-                               /* The locale "C"  or  "POSIX"  is  a  portable  locale;  its
-                                * LC_CTYPE  part  corresponds  to  the 7-bit ASCII character
-                                * set.  */
-                       } else {
-                               /* A locale name is typically of  the  form  language[_terri-
-                                * tory][.codeset][@modifier],  where  language is an ISO 639
-                                * language code, territory is an ISO 3166 country code,  and
-                                * codeset  is  a  character  set or encoding identifier like
-                                * ISO-8859-1 or UTF-8.
-                                */
-                               char *codeset, *p;
-                               
-                               if (!locale_charset) {
-                                       codeset = strchr (locale, '.');
-                                       if (codeset) {
-                                               codeset++;
-                                               
-                                               /* ; is a hack for debian systems and / is a hack for Solaris systems */
-                                               p = codeset;
-                                               while (*p && !strchr ("@;/", *p))
-                                                       p++;
-                                               
-                                               locale_charset = g_ascii_strdown (codeset, (size_t)(p - codeset));
-                                       } else {
-                                               /* charset unknown */
-                                               locale_charset = NULL;
-                                       }
-                               }
-                       }
-               }
+               outleft -= 2;
+       } else if (outleft < 4) {
+               errno = E2BIG;
+               return -1;
+       } else {
+               c2 = c - 0x10000;
+               
+               ch = (gunichar2) ((c2 >> 10) + 0xd800);
+               if (endian == BigEndian)
+                       *outptr++ = GUINT16_TO_BE (ch);
+               else
+                       *outptr++ = GUINT16_TO_LE (ch);
+               
+               ch = (gunichar2) ((c2 & 0x3ff) + 0xdc00);
+               if (endian == BigEndian)
+                       *outptr++ = GUINT16_TO_BE (ch);
+               else
+                       *outptr++ = GUINT16_TO_LE (ch);
+               
+               outleft -= 4;
        }
+       
+       *outbytes = (char *) outptr;
+       *outbytesleft = outleft;
+       
+       return 0;
 }
 
-gsize
-g_iconv (GIConv converter, gchar **inbuf, gsize *inleft, gchar **outbuf, gsize *outleft)
+static int
+encode_utf16be (gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       return iconv ((iconv_t) converter, inbuf, inleft, outbuf, outleft);
+       return encode_utf16_be_or_le (BigEndian, c, outbytes, outbytesleft);
 }
 
-GIConv
-g_iconv_open (const gchar *to, const gchar *from)
+static int
+encode_utf16le (gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       GIConvCacheNode *node;
-       iconv_t cd;
-       char *key;
+       return encode_utf16_be_or_le (LittleEndian, c, outbytes, outbytesleft);
+}
+
+static int
+encode_utf16 (gunichar c, char **outbytes, size_t *outbytesleft)
+{
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+       return encode_utf16_be_or_le (LittleEndian, c, outbytes, outbytesleft);
+#else
+       return encode_utf16_be_or_le (BigEndian, c, outbytes, outbytesleft);
+#endif
+}
+
+static int
+decode_utf8 (char **inbytes, size_t *inbytesleft, gunichar *outchar)
+{
+       size_t inleft = *inbytesleft;
+       char *inptr = *inbytes;
+       size_t i, len = 0;
+       unsigned char c;
+       gunichar u;
+       
+       c = *inptr++;
+       
+       if (c < 0x80) {
+               /* simple ascii case */
+               len = 1;
+       } else if (c < 0xe0) {
+               c &= 0x1f;
+               len = 2;
+       } else if (c < 0xf0) {
+               c &= 0x0f;
+               len = 3;
+       } else if (c < 0xf8) {
+               c &= 0x07;
+               len = 4;
+       } else if (c < 0xfc) {
+               c &= 0x03;
+               len = 5;
+       } else if (c < 0xfe) {
+               c &= 0x01;
+               len = 6;
+       } else {
+               errno = EILSEQ;
+               return -1;
+       }
        
-       if (from == NULL || to == NULL) {
+       if (len > inleft) {
                errno = EINVAL;
-               return (GIConv) -1;
+               return -1;
        }
        
-       ICONV_CACHE_LOCK ();
-       
-       g_iconv_init ();
-       
-       if (!g_ascii_strcasecmp (from, "x-unknown"))
-               from = locale_charset;
-       
-       from = charset_to_iconv_name (from);
-       to = charset_to_iconv_name (to);
-       key = g_alloca (strlen (from) + strlen (to) + 2);
-       sprintf (key, "%s:%s", from, to);
-       
-       if ((node = g_iconv_cache_lookup (iconv_cache, key, TRUE))) {
-               if (node->used) {
-                       if ((cd = iconv_open (to, from)) == (iconv_t) -1)
-                               goto exception;
-               } else {
-                       /* Apparently iconv on Solaris <= 7 segfaults if you pass in
-                        * NULL for anything but inbuf; work around that. (NULL outbuf
-                        * or NULL *outbuf is allowed by Unix98.)
-                        */
-                       size_t inleft = 0, outleft = 0;
-                       char *outbuf = NULL;
-                       
-                       cd = node->cd;
-                       node->used = TRUE;
-                       
-                       /* reset the descriptor */
-                       iconv (cd, NULL, &inleft, &outbuf, &outleft);
-               }
-               
-               node->refcount++;
-       } else {
-               if ((cd = iconv_open (to, from)) == (iconv_t) -1)
-                       goto exception;
-               
-               node = g_iconv_cache_insert (iconv_cache, key, cd);
+       u = c;
+       for (i = 1; i < len; i++) {
+               u = (u << 6) | ((*inptr) & 0x3f);
+               inptr++;
        }
        
-       g_hash_table_insert (iconv_open_hash, cd, node->key);
-       
-       ICONV_CACHE_UNLOCK ();
-       
-       return (GIConv) cd;
-       
- exception:
+       *inbytesleft = inleft - len;
+       *inbytes = inptr;
+       *outchar = u;
        
-       ICONV_CACHE_UNLOCK ();
-       
-       return (GIConv) -1;
+       return 0;
 }
 
-int
-g_iconv_close (GIConv converter)
+static int
+encode_utf8 (gunichar c, char **outbytes, size_t *outbytesleft)
 {
-       GIConvCacheNode *node;
-       const char *key;
-       iconv_t cd;
-       
-       if (converter == (GIConv) -1)
-               return 0;
-       
-       cd = (iconv_t) converter;
-       
-       ICONV_CACHE_LOCK ();
+       size_t outleft = *outbytesleft;
+       char *outptr = *outbytes;
+       size_t len, i;
+       int base;
+       
+       if (c < 128UL) {
+               base = 0;
+               len = 1;
+       } else if (c < 2048UL) {
+               base = 192;
+               len = 2;
+       } else if (c < 65536UL) {
+               base = 224;
+               len = 3;
+       } else if (c < 2097152UL) {
+               base = 240;
+               len = 4;
+       } else if (c < 67108864UL) {
+               base = 248;     
+               len = 5;
+       } else if (c < 2147483648UL) {
+               base = 252;
+               len = 6;
+       } else {
+               errno = EINVAL;
+               return -1;
+       }
        
-       g_iconv_init ();
+       if (outleft < len) {
+               errno = E2BIG;
+               return -1;
+       }
        
-       if ((key = g_hash_table_lookup (iconv_open_hash, cd))) {
-               g_hash_table_remove (iconv_open_hash, cd);
-               
-               node = (GIConvCacheNode *) g_iconv_cache_lookup (iconv_cache, key, FALSE);
-               g_assert (node);
-               
-               if (iconv_cache->size > ICONV_CACHE_MAX_SIZE) {
-                       /* expire before unreffing this node so that it wont get uncached */
-                       g_iconv_cache_expire_unused (iconv_cache);
-               }
-               
-               node->refcount--;
-               
-               if (cd == node->cd)
-                       node->used = FALSE;
-               else
-                       iconv_close (cd);
-       } else {
-               ICONV_CACHE_UNLOCK ();
-               
-               /* really this is an error... someone is trying to close an
-                * iconv_t descriptor that wasn't opened by us. */
-               
-               return iconv_close (cd);
+       for (i = len - 1; i > 0; i--) {
+               /* mask off 6 bits worth and add 128 */
+               outptr[i] = 128 + (c & 0x3f);
+               c >>= 6;
        }
        
-       ICONV_CACHE_UNLOCK ();
+       /* first character has a different base */
+       outptr[0] = base + c;
+       
+       *outbytesleft = outleft - len;
+       *outbytes = outptr + len;
        
        return 0;
 }
index 6dd6da0f6affc3d48da5de838357261535ccdffd..6f666c62976429f0809d5c77400678cb1f56f85a 100644 (file)
@@ -308,6 +308,7 @@ gchar  *g_stpcpy             (gchar *dest, const char *src);
 gchar   g_ascii_tolower      (gchar c);
 gchar  *g_ascii_strdown      (const gchar *str, gssize len);
 gint    g_ascii_strncasecmp  (const gchar *s1, const gchar *s2, gsize n);
+gint    g_ascii_strcasecmp   (const gchar *s1, const gchar *s2);
 gint    g_ascii_xdigit_value (gchar c);
 #define g_ascii_isspace(c)   (isspace (c) != 0)
 #define g_ascii_isalpha(c)   (isalpha (c) != 0)
@@ -956,9 +957,9 @@ gboolean         g_markup_parse_context_end_parse (GMarkupParseContext *context,
  */
 typedef struct _GIConv *GIConv;
 
-gsize g_iconv (GIConv converter, gchar **inbuf, gsize *inleft, gchar **outbuf, gsize *outleft);
-GIConv g_iconv_open (const gchar *to, const gchar *from);
-int g_iconv_close (GIConv converter);
+gsize g_iconv (GIConv cd, gchar **inbytes, gsize *inbytesleft, gchar **outbytes, gsize *outbytesleft);
+GIConv g_iconv_open (const gchar *to_charset, const gchar *from_charset);
+int g_iconv_close (GIConv cd);
 
 gboolean  g_get_charset        (G_CONST_RETURN char **charset);
 gchar    *g_locale_to_utf8     (const gchar *opsysstring, gssize len,
@@ -1027,40 +1028,48 @@ gchar *   g_utf8_find_prev_char (const char *str, const char *p);
                                  
  
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
-#   define GUINT32_TO_LE(x) (x)
-#   define GUINT64_TO_LE(x) (x)
-#   define GUINT16_TO_LE(x) (x)
-#   define GUINT_TO_LE(x)   (x)
-#   define GUINT32_TO_BE(x) GUINT32_SWAP_LE_BE(x)
-#   define GUINT16_FROM_BE(x) GUINT16_SWAP_LE_BE(x)
-#   define GUINT32_FROM_BE(x) GUINT32_SWAP_LE_BE(x)
 #   define GUINT64_FROM_BE(x) GUINT64_SWAP_LE_BE(x)
-#   define GINT16_FROM_BE(x) GUINT16_SWAP_LE_BE(x)
-#   define GINT32_FROM_BE(x) GUINT32_SWAP_LE_BE(x)
-#   define GINT64_FROM_BE(x) GUINT64_SWAP_LE_BE(x)
+#   define GUINT32_FROM_BE(x) GUINT32_SWAP_LE_BE(x)
+#   define GUINT16_FROM_BE(x) GUINT16_SWAP_LE_BE(x)
+#   define GUINT_FROM_BE(x)   GUINT32_SWAP_LE_BE(x)
+#   define GUINT64_FROM_LE(x) (x)
+#   define GUINT32_FROM_LE(x) (x)
+#   define GUINT16_FROM_LE(x) (x)
+#   define GUINT_FROM_LE(x)   (x)
+#   define GUINT64_TO_BE(x)   GUINT64_SWAP_LE_BE(x)
+#   define GUINT32_TO_BE(x)   GUINT32_SWAP_LE_BE(x)
+#   define GUINT16_TO_BE(x)   GUINT16_SWAP_LE_BE(x)
+#   define GUINT_TO_BE(x)     GUINT32_SWAP_LE_BE(x)
+#   define GUINT64_TO_LE(x)   (x)
+#   define GUINT32_TO_LE(x)   (x)
+#   define GUINT16_TO_LE(x)   (x)
+#   define GUINT_TO_LE(x)     (x)
 #else
-#   define GUINT32_TO_LE(x) GUINT32_SWAP_LE_BE(x)
-#   define GUINT64_TO_LE(x) GUINT64_SWAP_LE_BE(x)
-#   define GUINT16_TO_LE(x) GUINT16_SWAP_LE_BE(x)
-#   define GUINT_TO_LE(x)   GUINT32_SWAP_LE_BE(x)
-#   define GUINT32_TO_BE(x) (x)
-#   define GUINT16_FROM_BE(x) (x)
-#   define GUINT32_FROM_BE(x) (x)
 #   define GUINT64_FROM_BE(x) (x)
-#   define GINT16_FROM_BE(x) (x)
-#   define GINT32_FROM_BE(x) (x)
-#   define GINT64_FROM_BE(x) (x)
+#   define GUINT32_FROM_BE(x) (x)
+#   define GUINT16_FROM_BE(x) (x)
+#   define GUINT_FROM_BE(x)   (x)
+#   define GUINT64_FROM_LE(x) GUINT64_SWAP_LE_BE(x)
+#   define GUINT32_FROM_LE(x) GUINT32_SWAP_LE_BE(x)
+#   define GUINT16_FROM_LE(x) GUINT16_SWAP_LE_BE(x)
+#   define GUINT_FROM_LE(x)   GUINT32_SWAP_LE_BE(x)
+#   define GUINT64_TO_BE(x)   (x)
+#   define GUINT32_TO_BE(x)   (x)
+#   define GUINT16_TO_BE(x)   (x)
+#   define GUINT_TO_BE(x)     (x)
+#   define GUINT64_TO_LE(x)   GUINT64_SWAP_LE_BE(x)
+#   define GUINT32_TO_LE(x)   GUINT32_SWAP_LE_BE(x)
+#   define GUINT16_TO_LE(x)   GUINT16_SWAP_LE_BE(x)
+#   define GUINT_TO_LE(x)     GUINT32_SWAP_LE_BE(x)
 #endif
 
+#define GINT64_FROM_BE(x)   (GUINT64_TO_BE (x))
+#define GINT32_FROM_BE(x)   (GUINT32_TO_BE (x))
+#define GINT16_FROM_BE(x)   (GUINT16_TO_BE (x))
 #define GINT64_FROM_LE(x)   (GUINT64_TO_LE (x))
 #define GINT32_FROM_LE(x)   (GUINT32_TO_LE (x))
 #define GINT16_FROM_LE(x)   (GUINT16_TO_LE (x))
 
-#define GUINT32_FROM_LE(x)  (GUINT32_TO_LE (x))
-#define GUINT64_FROM_LE(x)  (GUINT64_TO_LE (x))
-#define GUINT16_FROM_LE(x)  (GUINT16_TO_LE (x))
-#define GUINT_FROM_LE(x)    (GUINT_TO_LE (x))
-
 #define _EGLIB_MAJOR  2
 #define _EGLIB_MIDDLE 4
 #define _EGLIB_MINOR  0
index 41ee60753b61608867877543d2b8d209545614b1..20596c123d38606ab04e928600c8a986b035a207 100644 (file)
@@ -708,22 +708,37 @@ g_ascii_strncasecmp (const gchar *s1, const gchar *s2, gsize n)
        g_return_val_if_fail (s1 != NULL, 0);
        g_return_val_if_fail (s2 != NULL, 0);
 
-       for (i = 0; i < n; i++){
+       for (i = 0; i < n; i++) {
                gchar c1 = g_ascii_tolower (*s1++);
                gchar c2 = g_ascii_tolower (*s2++);
                
-               if (c1 == c2)
-                       continue;
-               
-               if (c1 == 0)
-                       return -1;
-               if (c2 == 0)
-                       return 1;
-               return c1-c2;
+               if (c1 != c2)
+                       return c1 - c2;
        }
+       
        return 0;
 }
 
+gint
+g_ascii_strcasecmp (const gchar *s1, const gchar *s2)
+{
+       const char *sp1 = s1;
+       const char *sp2 = s2;
+       
+       g_return_val_if_fail (s1 != NULL, 0);
+       g_return_val_if_fail (s2 != NULL, 0);
+       
+       while (*sp1 != '\0') {
+               char c1 = g_ascii_tolower (*sp1++);
+               char c2 = g_ascii_tolower (*sp2++);
+               
+               if (c1 != c2)
+                       return c1 - c2;
+       }
+       
+       return (*sp1) - (*sp2);
+}
+
 gchar *
 g_strdelimit (gchar *string, const gchar *delimiters, gchar new_delimiter)
 {
index d49fb968d7a1adee256e4395a3b8b260af76ad69..7ef1874254ee44e2e65efb71de1638892ce4b1d5 100644 (file)
 /* FIXME */
 #  define CODESET 1
 #  include <windows.h>
-#  ifdef _MSC_VER
-       typedef int iconv_t;
-#  endif
 #else
 #    ifdef HAVE_LANGINFO_H
 #       include <langinfo.h>
 #    endif
-#    ifdef HAVE_ICONV_H
-#       include <iconv.h>
-#    endif
 #    ifdef HAVE_LOCALCHARSET_H
 #       include <localcharset.h>
 #    endif
@@ -93,7 +87,7 @@ static const gulong offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E208
 GUnicodeType 
 g_unichar_type (gunichar c)
 {
-int i;
+       int i;
 
        guint16 cp = (guint16) c;
        for (i = 0; i < unicode_category_ranges_count; i++) {
@@ -227,32 +221,29 @@ g_convert (const gchar *str, gssize len,
           const gchar *to_codeset, const gchar *from_codeset,
           gsize *bytes_read, gsize *bytes_written, GError **error)
 {
-       char *result = NULL;
-#ifdef G_OS_WIN32
-#elif HAVE_ICONV_H
-       iconv_t convertor;
-       char *buffer, *output;
-       const char *strptr = (const char *) str;
        size_t str_len = len == -1 ? strlen (str) : len;
-       size_t buffer_size;
-       size_t left, out_left;
+       const char *strptr = (const char *) str;
+       size_t left, out_left, buffer_size;
+       char *buffer, *output;
+       char *result = NULL;
+       GIConv cd;
        
-       convertor = iconv_open (to_codeset, from_codeset);
-       if (convertor == (iconv_t) -1){
+       if ((cd = g_iconv_open (to_codeset, from_codeset)) == (GIConv) -1) {
                if (bytes_written)
                        *bytes_written = 0;
                if (bytes_read)
                        *bytes_read = 0;
                return NULL;
        }
-
+       
        buffer_size = str_len + 1 + 8;
        buffer = g_malloc (buffer_size);
        out_left = str_len;
        output = buffer;
        left = str_len;
+       
        while (left > 0){
-               int res = iconv (convertor, (char **) &strptr, &left, &output, &out_left);
+               int res = g_iconv (cd, (char **) &strptr, &left, &output, &out_left);
                if (res == (size_t) -1){
                        if (errno == E2BIG){
                                char *n;
@@ -295,8 +286,8 @@ g_convert (const gchar *str, gssize len,
        *output = 0;
        result = buffer;
  leave:
-       iconv_close (convertor);
-#endif
+       g_iconv_close (cd);
+       
        return result;
 }