1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2011 Jeffrey Stedfast
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use, copy,
9 * modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
38 #define FORCE_INLINE(RET_TYPE) __forceinline RET_TYPE
40 #define FORCE_INLINE(RET_TYPE) inline RET_TYPE __attribute__((always_inline))
44 #define UNROLL_DECODE_UTF8 0
45 #define UNROLL_ENCODE_UTF8 0
47 typedef int (* Decoder) (char *inbuf, size_t inleft, gunichar *outchar);
48 typedef int (* Encoder) (gunichar c, char *outbuf, size_t outleft);
59 static int decode_utf32be (char *inbuf, size_t inleft, gunichar *outchar);
60 static int encode_utf32be (gunichar c, char *outbuf, size_t outleft);
62 static int decode_utf32le (char *inbuf, size_t inleft, gunichar *outchar);
63 static int encode_utf32le (gunichar c, char *outbuf, size_t outleft);
65 static int decode_utf16be (char *inbuf, size_t inleft, gunichar *outchar);
66 static int encode_utf16be (gunichar c, char *outbuf, size_t outleft);
68 static int decode_utf16le (char *inbuf, size_t inleft, gunichar *outchar);
69 static int encode_utf16le (gunichar c, char *outbuf, size_t outleft);
71 static FORCE_INLINE (int) decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar);
72 static int encode_utf8 (gunichar c, char *outbuf, size_t outleft);
74 static int decode_latin1 (char *inbuf, size_t inleft, gunichar *outchar);
75 static int encode_latin1 (gunichar c, char *outbuf, size_t outleft);
77 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
78 #define decode_utf32 decode_utf32le
79 #define encode_utf32 encode_utf32le
80 #define decode_utf16 decode_utf16le
81 #define encode_utf16 encode_utf16le
83 #define decode_utf32 decode_utf32be
84 #define encode_utf32 encode_utf32be
85 #define decode_utf16 decode_utf16be
86 #define encode_utf16 encode_utf16be
94 { "ISO-8859-1", decode_latin1, encode_latin1 },
95 { "ISO8859-1", decode_latin1, encode_latin1 },
96 { "UTF-32BE", decode_utf32be, encode_utf32be },
97 { "UTF-32LE", decode_utf32le, encode_utf32le },
98 { "UTF-16BE", decode_utf16be, encode_utf16be },
99 { "UTF-16LE", decode_utf16le, encode_utf16le },
100 { "UTF-32", decode_utf32, encode_utf32 },
101 { "UTF-16", decode_utf16, encode_utf16 },
102 { "UTF-8", decode_utf8, encode_utf8 },
103 { "US-ASCII", decode_latin1, encode_latin1 },
104 { "Latin1", decode_latin1, encode_latin1 },
105 { "ASCII", decode_latin1, encode_latin1 },
106 { "UTF32", decode_utf32, encode_utf32 },
107 { "UTF16", decode_utf16, encode_utf16 },
108 { "UTF8", decode_utf8, encode_utf8 },
113 g_iconv_open (const char *to_charset, const char *from_charset)
116 iconv_t icd = (iconv_t) -1;
118 Decoder decoder = NULL;
119 Encoder encoder = NULL;
123 if (!to_charset || !from_charset || !to_charset[0] || !from_charset[0]) {
129 for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
130 if (!g_ascii_strcasecmp (charsets[i].name, from_charset))
131 decoder = charsets[i].decoder;
133 if (!g_ascii_strcasecmp (charsets[i].name, to_charset))
134 encoder = charsets[i].encoder;
137 if (!encoder || !decoder) {
139 if ((icd = iconv_open (to_charset, from_charset)) == (iconv_t) -1)
148 cd = (GIConv) g_malloc (sizeof (struct _GIConv));
149 cd->decode = decoder;
150 cd->encode = encoder;
161 g_iconv_close (GIConv cd)
164 if (cd->cd != (iconv_t) -1)
165 iconv_close (cd->cd);
174 g_iconv (GIConv cd, gchar **inbytes, gsize *inbytesleft,
175 gchar **outbytes, gsize *outbytesleft)
177 gsize inleft, outleft;
178 char *inptr, *outptr;
183 if (cd->cd != (iconv_t) -1) {
184 /* Note: gsize may have a different size than size_t, so we need to
185 remap inbytesleft and outbytesleft to size_t's. */
186 size_t *outleftptr, *inleftptr;
187 size_t n_outleft, n_inleft;
190 n_inleft = *inbytesleft;
191 inleftptr = &n_inleft;
197 n_outleft = *outbytesleft;
198 outleftptr = &n_outleft;
203 return iconv (cd->cd, inbytes, inleftptr, outbytes, outleftptr);
207 if (outbytes == NULL || outbytesleft == NULL) {
208 /* reset converter */
213 inleft = inbytesleft ? *inbytesleft : 0;
214 inptr = inbytes ? *inbytes : NULL;
215 outleft = *outbytesleft;
218 if ((c = cd->c) != (gunichar) -1)
222 if ((rc = cd->decode (inptr, inleft, &c)) < 0)
229 if ((rc = cd->encode (c, outptr, outleft)) < 0)
238 *inbytesleft = inleft;
243 *outbytesleft = outleft;
247 return rc < 0 ? -1 : 0;
251 * Unicode encoders and decoders
255 decode_utf32be (char *inbuf, size_t inleft, gunichar *outchar)
257 unsigned char *inptr = (unsigned char *) inbuf;
265 c = (inptr[0] << 24) | (inptr[1] << 16) | (inptr[2] << 8) | inptr[3];
267 if (c >= 0xd800 && c < 0xe000) {
270 } else if (c >= 0x110000) {
281 decode_utf32le (char *inbuf, size_t inleft, gunichar *outchar)
283 unsigned char *inptr = (unsigned char *) inbuf;
291 c = (inptr[3] << 24) | (inptr[2] << 16) | (inptr[1] << 8) | inptr[0];
293 if (c >= 0xd800 && c < 0xe000) {
296 } else if (c >= 0x110000) {
307 encode_utf32be (gunichar c, char *outbuf, size_t outleft)
309 unsigned char *outptr = (unsigned char *) outbuf;
316 outptr[0] = (c >> 24) & 0xff;
317 outptr[1] = (c >> 16) & 0xff;
318 outptr[2] = (c >> 8) & 0xff;
319 outptr[3] = c & 0xff;
325 encode_utf32le (gunichar c, char *outbuf, size_t outleft)
327 unsigned char *outptr = (unsigned char *) outbuf;
334 outptr[0] = c & 0xff;
335 outptr[1] = (c >> 8) & 0xff;
336 outptr[2] = (c >> 16) & 0xff;
337 outptr[3] = (c >> 24) & 0xff;
343 decode_utf16be (char *inbuf, size_t inleft, gunichar *outchar)
345 unsigned char *inptr = (unsigned char *) inbuf;
354 u = (inptr[0] << 8) | inptr[1];
357 /* 0x0000 -> 0xd7ff */
360 } else if (u < 0xdc00) {
361 /* 0xd800 -> 0xdbff */
367 c = (inptr[2] << 8) | inptr[3];
369 if (c < 0xdc00 || c > 0xdfff) {
374 u = ((u - 0xd800) << 10) + (c - 0xdc00) + 0x0010000UL;
378 } else if (u < 0xe000) {
379 /* 0xdc00 -> 0xdfff */
383 /* 0xe000 -> 0xffff */
390 decode_utf16le (char *inbuf, size_t inleft, gunichar *outchar)
392 unsigned char *inptr = (unsigned char *) inbuf;
401 u = (inptr[1] << 8) | inptr[0];
404 /* 0x0000 -> 0xd7ff */
407 } else if (u < 0xdc00) {
408 /* 0xd800 -> 0xdbff */
414 c = (inptr[3] << 8) | inptr[2];
416 if (c < 0xdc00 || c > 0xdfff) {
421 u = ((u - 0xd800) << 10) + (c - 0xdc00) + 0x0010000UL;
425 } else if (u < 0xe000) {
426 /* 0xdc00 -> 0xdfff */
430 /* 0xe000 -> 0xffff */
437 encode_utf16be (gunichar c, char *outbuf, size_t outleft)
439 unsigned char *outptr = (unsigned char *) outbuf;
449 outptr[0] = (c >> 8) & 0xff;
450 outptr[1] = c & 0xff;
461 ch = (gunichar2) ((c2 >> 10) + 0xd800);
462 outptr[0] = (ch >> 8) & 0xff;
463 outptr[1] = ch & 0xff;
465 ch = (gunichar2) ((c2 & 0x3ff) + 0xdc00);
466 outptr[2] = (ch >> 8) & 0xff;
467 outptr[3] = ch & 0xff;
474 encode_utf16le (gunichar c, char *outbuf, size_t outleft)
476 unsigned char *outptr = (unsigned char *) outbuf;
486 outptr[0] = c & 0xff;
487 outptr[1] = (c >> 8) & 0xff;
498 ch = (gunichar2) ((c2 >> 10) + 0xd800);
499 outptr[0] = ch & 0xff;
500 outptr[1] = (ch >> 8) & 0xff;
502 ch = (gunichar2) ((c2 & 0x3ff) + 0xdc00);
503 outptr[2] = ch & 0xff;
504 outptr[3] = (ch >> 8) & 0xff;
510 static FORCE_INLINE (int)
511 decode_utf8 (char *inbuf, size_t inleft, gunichar *outchar)
513 unsigned char *inptr = (unsigned char *) inbuf;
520 /* simple ascii case */
523 } else if (u < 0xc2) {
526 } else if (u < 0xe0) {
529 } else if (u < 0xf0) {
532 } else if (u < 0xf8) {
535 } else if (u < 0xfc) {
538 } else if (u < 0xfe) {
551 #if UNROLL_DECODE_UTF8
553 case 6: u = (u << 6) | (*++inptr ^ 0x80);
554 case 5: u = (u << 6) | (*++inptr ^ 0x80);
555 case 4: u = (u << 6) | (*++inptr ^ 0x80);
556 case 3: u = (u << 6) | (*++inptr ^ 0x80);
557 case 2: u = (u << 6) | (*++inptr ^ 0x80);
560 for (i = 1; i < n; i++)
561 u = (u << 6) | (*++inptr ^ 0x80);
570 encode_utf8 (gunichar c, char *outbuf, size_t outleft)
572 unsigned char *outptr = (unsigned char *) outbuf;
578 } else if (c < 0x800) {
581 } else if (c < 0x10000) {
584 } else if (c < 0x200000) {
587 } else if (c < 0x4000000) {
600 #if UNROLL_ENCODE_UTF8
602 case 6: outptr[5] = (c & 0x3f) | 0x80; c >>= 6;
603 case 5: outptr[4] = (c & 0x3f) | 0x80; c >>= 6;
604 case 4: outptr[3] = (c & 0x3f) | 0x80; c >>= 6;
605 case 3: outptr[2] = (c & 0x3f) | 0x80; c >>= 6;
606 case 2: outptr[1] = (c & 0x3f) | 0x80; c >>= 6;
607 case 1: outptr[0] = c | base;
610 for (i = n - 1; i > 0; i--) {
611 outptr[i] = (c & 0x3f) | 0x80;
615 outptr[0] = c | base;
622 decode_latin1 (char *inbuf, size_t inleft, gunichar *outchar)
624 *outchar = (unsigned char) *inbuf;
629 encode_latin1 (gunichar c, char *outbuf, size_t outleft)
648 * Simple conversion API
651 static gpointer error_quark = "ConvertError";
654 g_convert_error_quark (void)
660 g_convert (const gchar *str, gssize len, const gchar *to_charset, const gchar *from_charset,
661 gsize *bytes_read, gsize *bytes_written, GError **err)
663 gsize outsize, outused, outleft, inleft, grow, rc;
664 char *result, *outbuf, *inbuf;
665 gboolean flush = FALSE;
666 gboolean done = FALSE;
669 g_return_val_if_fail (str != NULL, NULL);
670 g_return_val_if_fail (to_charset != NULL, NULL);
671 g_return_val_if_fail (from_charset != NULL, NULL);
673 if ((cd = g_iconv_open (to_charset, from_charset)) == (GIConv) -1) {
674 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
675 "Conversion from %s to %s not supported.",
676 from_charset, to_charset);
687 inleft = len < 0 ? strlen (str) : len;
688 inbuf = (char *) str;
690 outleft = outsize = MAX (inleft, 8);
691 outbuf = result = g_malloc (outsize + 4);
695 rc = g_iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
697 rc = g_iconv (cd, NULL, NULL, &outbuf, &outleft);
699 if (rc == (gsize) -1) {
702 /* grow our result buffer */
703 grow = MAX (inleft, 8) << 1;
704 outused = outbuf - result;
707 result = g_realloc (result, outsize + 4);
708 outbuf = result + outused;
711 /* incomplete input, stop converting and terminate here */
718 /* illegal sequence in the input */
719 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "%s", g_strerror (errno));
722 /* save offset of the illegal input sequence */
723 *bytes_read = (inbuf - str);
734 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, "%s", g_strerror (errno));
747 /* input has been converted and output has been flushed */
750 /* input has been converted, need to flush the output */
757 /* Note: not all charsets can be null-terminated with a single
758 null byte. UCS2, for example, needs 2 null bytes and UCS4
759 needs 4. I hope that 4 null bytes is enough to terminate all
760 multibyte charsets? */
762 /* null-terminate the result */
763 memset (outbuf, 0, 4);
766 *bytes_written = outbuf - result;
769 *bytes_read = inbuf - str;
780 * from http://home.tiscali.nl/t876506/utf8tbl.html
782 * From Unicode UCS-4 to UTF-8:
783 * Start with the Unicode number expressed as a decimal number and call this ud.
785 * If ud <128 (7F hex) then UTF-8 is 1 byte long, the value of ud.
787 * If ud >=128 and <=2047 (7FF hex) then UTF-8 is 2 bytes long.
788 * byte 1 = 192 + (ud div 64)
789 * byte 2 = 128 + (ud mod 64)
791 * If ud >=2048 and <=65535 (FFFF hex) then UTF-8 is 3 bytes long.
792 * byte 1 = 224 + (ud div 4096)
793 * byte 2 = 128 + ((ud div 64) mod 64)
794 * byte 3 = 128 + (ud mod 64)
796 * If ud >=65536 and <=2097151 (1FFFFF hex) then UTF-8 is 4 bytes long.
797 * byte 1 = 240 + (ud div 262144)
798 * byte 2 = 128 + ((ud div 4096) mod 64)
799 * byte 3 = 128 + ((ud div 64) mod 64)
800 * byte 4 = 128 + (ud mod 64)
802 * If ud >=2097152 and <=67108863 (3FFFFFF hex) then UTF-8 is 5 bytes long.
803 * byte 1 = 248 + (ud div 16777216)
804 * byte 2 = 128 + ((ud div 262144) mod 64)
805 * byte 3 = 128 + ((ud div 4096) mod 64)
806 * byte 4 = 128 + ((ud div 64) mod 64)
807 * byte 5 = 128 + (ud mod 64)
809 * If ud >=67108864 and <=2147483647 (7FFFFFFF hex) then UTF-8 is 6 bytes long.
810 * byte 1 = 252 + (ud div 1073741824)
811 * byte 2 = 128 + ((ud div 16777216) mod 64)
812 * byte 3 = 128 + ((ud div 262144) mod 64)
813 * byte 4 = 128 + ((ud div 4096) mod 64)
814 * byte 5 = 128 + ((ud div 64) mod 64)
815 * byte 6 = 128 + (ud mod 64)
818 g_unichar_to_utf8 (gunichar c, gchar *outbuf)
825 } else if (c < 0x800) {
828 } else if (c < 0x10000) {
831 } else if (c < 0x200000) {
834 } else if (c < 0x4000000) {
837 } else if (c < 0x80000000) {
844 if (outbuf != NULL) {
845 for (i = n - 1; i > 0; i--) {
846 /* mask off 6 bits worth and add 128 */
847 outbuf[i] = (c & 0x3f) | 0x80;
851 /* first character has a different base */
852 outbuf[0] = c | base;
858 static FORCE_INLINE (int)
859 g_unichar_to_utf16 (gunichar c, gunichar2 *outbuf)
865 *outbuf = (gunichar2) c;
868 } else if (c < 0xe000) {
870 } else if (c < 0x10000) {
872 *outbuf = (gunichar2) c;
875 } else if (c < 0x110000) {
879 outbuf[0] = (gunichar2) ((c2 >> 10) + 0xd800);
880 outbuf[1] = (gunichar2) ((c2 & 0x3ff) + 0xdc00);
890 g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written)
892 gunichar *outbuf, *outptr;
896 g_return_val_if_fail (str != NULL, NULL);
898 n = g_utf8_strlen (str, len);
903 outptr = outbuf = g_malloc ((n + 1) * sizeof (gunichar));
904 inptr = (char *) str;
906 for (i = 0; i < n; i++) {
907 *outptr++ = g_utf8_get_char (inptr);
908 inptr = g_utf8_next_char (inptr);
917 eg_utf8_to_utf16_general (const gchar *str, glong len, glong *items_read, glong *items_written, gboolean include_nuls, GError **err)
919 gunichar2 *outbuf, *outptr;
926 g_return_val_if_fail (str != NULL, NULL);
930 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED, "Conversions with embedded nulls must pass the string length");
937 inptr = (char *) str;
941 if ((n = decode_utf8 (inptr, inleft, &c)) < 0)
944 if (c == 0 && !include_nuls)
947 if ((u = g_unichar_to_utf16 (c, NULL)) < 0) {
958 *items_read = inptr - str;
961 *items_written = outlen;
963 outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
964 inptr = (char *) str;
968 if ((n = decode_utf8 (inptr, inleft, &c)) < 0)
971 if (c == 0 && !include_nuls)
974 outptr += g_unichar_to_utf16 (c, outptr);
984 if (errno == EILSEQ) {
985 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
986 "Illegal byte sequence encounted in the input.");
987 } else if (items_read) {
988 /* partial input is ok if we can let our caller know... */
991 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
992 "Partial byte sequence encountered in the input.");
996 *items_read = inptr - str;
1005 g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
1007 return eg_utf8_to_utf16_general (str, len, items_read, items_written, FALSE, err);
1011 eg_utf8_to_utf16_with_nuls (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
1013 return eg_utf8_to_utf16_general (str, len, items_read, items_written, TRUE, err);
1017 g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **err)
1019 gunichar *outbuf, *outptr;
1026 g_return_val_if_fail (str != NULL, NULL);
1031 inptr = (char *) str;
1034 while (inleft > 0) {
1035 if ((n = decode_utf8 (inptr, inleft, &c)) < 0) {
1036 if (errno == EILSEQ) {
1037 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1038 "Illegal byte sequence encounted in the input.");
1039 } else if (items_read) {
1040 /* partial input is ok if we can let our caller know... */
1043 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
1044 "Partial byte sequence encountered in the input.");
1048 *items_read = inptr - str;
1063 *items_written = outlen / 4;
1066 *items_read = inptr - str;
1068 outptr = outbuf = g_malloc (outlen + 4);
1069 inptr = (char *) str;
1072 while (inleft > 0) {
1073 if ((n = decode_utf8 (inptr, inleft, &c)) < 0)
1089 g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
1091 char *inptr, *outbuf, *outptr;
1097 g_return_val_if_fail (str != NULL, NULL);
1105 inptr = (char *) str;
1108 while (inleft > 0) {
1109 if ((n = decode_utf16 (inptr, inleft, &c)) < 0) {
1110 if (n == -2 && inleft > 2) {
1111 /* This means that the first UTF-16 char was read, but second failed */
1116 if (errno == EILSEQ) {
1117 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1118 "Illegal byte sequence encounted in the input.");
1119 } else if (items_read) {
1120 /* partial input is ok if we can let our caller know... */
1123 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
1124 "Partial byte sequence encountered in the input.");
1128 *items_read = (inptr - (char *) str) / 2;
1137 outlen += g_unichar_to_utf8 (c, NULL);
1143 *items_read = (inptr - (char *) str) / 2;
1146 *items_written = outlen;
1148 outptr = outbuf = g_malloc (outlen + 1);
1149 inptr = (char *) str;
1152 while (inleft > 0) {
1153 if ((n = decode_utf16 (inptr, inleft, &c)) < 0)
1158 outptr += g_unichar_to_utf8 (c, outptr);
1169 g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **err)
1171 gunichar *outbuf, *outptr;
1178 g_return_val_if_fail (str != NULL, NULL);
1186 inptr = (char *) str;
1189 while (inleft > 0) {
1190 if ((n = decode_utf16 (inptr, inleft, &c)) < 0) {
1191 if (n == -2 && inleft > 2) {
1192 /* This means that the first UTF-16 char was read, but second failed */
1197 if (errno == EILSEQ) {
1198 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1199 "Illegal byte sequence encounted in the input.");
1200 } else if (items_read) {
1201 /* partial input is ok if we can let our caller know... */
1204 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
1205 "Partial byte sequence encountered in the input.");
1209 *items_read = (inptr - (char *) str) / 2;
1224 *items_read = (inptr - (char *) str) / 2;
1227 *items_written = outlen / 4;
1229 outptr = outbuf = g_malloc (outlen + 4);
1230 inptr = (char *) str;
1233 while (inleft > 0) {
1234 if ((n = decode_utf16 (inptr, inleft, &c)) < 0)
1250 g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **err)
1252 char *outbuf, *outptr;
1257 g_return_val_if_fail (str != NULL, NULL);
1260 for (i = 0; str[i] != 0; i++) {
1261 if ((n = g_unichar_to_utf8 (str[i], NULL)) < 0) {
1262 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1263 "Illegal byte sequence encounted in the input.");
1277 for (i = 0; i < len && str[i] != 0; i++) {
1278 if ((n = g_unichar_to_utf8 (str[i], NULL)) < 0) {
1279 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1280 "Illegal byte sequence encounted in the input.");
1297 outptr = outbuf = g_malloc (outlen + 1);
1298 for (i = 0; i < len; i++)
1299 outptr += g_unichar_to_utf8 (str[i], outptr);
1303 *items_written = outlen;
1312 g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **err)
1314 gunichar2 *outbuf, *outptr;
1319 g_return_val_if_fail (str != NULL, NULL);
1322 for (i = 0; str[i] != 0; i++) {
1323 if ((n = g_unichar_to_utf16 (str[i], NULL)) < 0) {
1324 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1325 "Illegal byte sequence encounted in the input.");
1339 for (i = 0; i < len && str[i] != 0; i++) {
1340 if ((n = g_unichar_to_utf16 (str[i], NULL)) < 0) {
1341 g_set_error (err, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1342 "Illegal byte sequence encounted in the input.");
1359 outptr = outbuf = g_malloc ((outlen + 1) * sizeof (gunichar2));
1360 for (i = 0; i < len; i++)
1361 outptr += g_unichar_to_utf16 (str[i], outptr);
1365 *items_written = outlen;