2 * strenc.c: string encoding conversions
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
18 /* Tries to turn a NULL-terminated string into UTF16.
20 * First, see if it's valid UTF8, in which case just turn it directly
21 * into UTF16. Next, run through the colon-separated encodings in
22 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
23 * returning the first successful conversion to UTF16. If no
24 * conversion succeeds, return NULL.
26 * Callers must free the returned string if not NULL. bytes holds the number
27 * of bytes in the returned string, not including the terminator.
29 gunichar2 *mono_unicode_from_external (const gchar *in, gsize *bytes)
33 const gchar *encoding_list;
41 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
42 if(encoding_list==NULL) {
46 encodings=g_strsplit (encoding_list, ":", 0);
47 for(i=0;encodings[i]!=NULL; i++) {
49 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
52 /* "default_locale" is a special case encoding */
53 if(!strcmp (encodings[i], "default_locale")) {
54 gchar *utf8=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
56 res=(gchar *) g_utf8_to_utf16 (utf8, -1, NULL, &lbytes, NULL);
57 *bytes = (gsize) lbytes;
61 /* Don't use UTF16 here. It returns the <FF FE> prepended to the string */
62 res = g_convert (in, strlen (in), "UTF8", encodings[i], NULL, bytes, NULL);
65 res = (gchar *) g_utf8_to_utf16 (res, -1, NULL, &lbytes, NULL);
66 *bytes = (gsize) lbytes;
72 g_strfreev (encodings);
74 return((gunichar2 *)res);
78 g_strfreev (encodings);
80 if(g_utf8_validate (in, -1, NULL)) {
81 gunichar2 *unires=g_utf8_to_utf16 (in, -1, NULL, (glong *)bytes, NULL);
89 /* Tries to turn a NULL-terminated string into UTF8.
91 * First, see if it's valid UTF8, in which case there's nothing more
92 * to be done. Next, run through the colon-separated encodings in
93 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
94 * returning the first successful conversion to utf8. If no
95 * conversion succeeds, return NULL.
97 * Callers must free the returned string if not NULL.
99 * This function is identical to mono_unicode_from_external, apart
100 * from returning utf8 not utf16; it's handy in a few places to work
103 gchar *mono_utf8_from_external (const gchar *in)
107 const gchar *encoding_list;
114 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
115 if(encoding_list==NULL) {
119 encodings=g_strsplit (encoding_list, ":", 0);
120 for(i=0;encodings[i]!=NULL; i++) {
122 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
126 /* "default_locale" is a special case encoding */
127 if(!strcmp (encodings[i], "default_locale")) {
128 res=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
129 if(res!=NULL && !g_utf8_validate (res, -1, NULL)) {
134 res=g_convert (in, -1, "UTF8", encodings[i], NULL,
139 g_strfreev (encodings);
144 g_strfreev (encodings);
146 if(g_utf8_validate (in, -1, NULL)) {
147 return(g_strdup (in));
153 /* Turns NULL-terminated UTF16 into either UTF8, or the first
154 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
155 * work, then UTF8 is returned.
157 * Callers must free the returned string.
159 gchar *mono_unicode_to_external (const gunichar2 *uni)
162 const gchar *encoding_list;
164 /* Turn the unicode into utf8 to start with, because its
165 * easier to work with gchar * than gunichar2 *
167 utf8=g_utf16_to_utf8 (uni, -1, NULL, NULL, NULL);
168 g_assert (utf8!=NULL);
170 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
171 if(encoding_list==NULL) {
175 gchar *res, **encodings;
178 encodings=g_strsplit (encoding_list, ":", 0);
179 for(i=0; encodings[i]!=NULL; i++) {
180 if(!strcmp (encodings[i], "default_locale")) {
181 res=g_locale_from_utf8 (utf8, -1, NULL, NULL,
184 res=g_convert (utf8, -1, encodings[i], "UTF8",
190 g_strfreev (encodings);
196 g_strfreev (encodings);
199 /* Nothing else worked, so just return the utf8 */