2 * strenc.c: string encoding conversions
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
17 /* Tries to turn a NULL-terminated string into UTF16LE.
19 * First, see if it's valid UTF8, in which case just turn it directly
20 * into UTF16LE. Next, run through the colon-separated encodings in
21 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
22 * returning the first successful conversion to utf16. If no
23 * conversion succeeds, return NULL.
25 * Callers must free the returned string if not NULL. bytes holds the number
26 * of bytes in the returned string, not including the terminator.
28 gunichar2 *mono_unicode_from_external (const gchar *in, gsize *bytes)
32 const gchar *encoding_list;
39 if(g_utf8_validate (in, -1, NULL)) {
40 /* Use g_convert not g_utf8_to_utf16 because we need
43 res=g_convert (in, -1, "UTF16LE", "UTF8", NULL, bytes, NULL);
44 return((gunichar2 *)res);
47 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
48 if(encoding_list==NULL) {
52 encodings=g_strsplit (encoding_list, ":", 0);
53 for(i=0;encodings[i]!=NULL; i++) {
55 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
59 /* "default_locale" is a special case encoding */
60 if(!strcmp (encodings[i], "default_locale")) {
61 gchar *utf8=g_locale_to_utf8 (in, -1, NULL, NULL,
63 if(utf8!=NULL && g_utf8_validate (utf8, -1, NULL)) {
64 res=g_convert (utf8, -1, "UTF16LE",
65 encodings[i], NULL, bytes,
70 res=g_convert (in, -1, "UTF16LE", encodings[i], NULL,
75 g_strfreev (encodings);
76 return((gunichar2 *)res);
80 g_strfreev (encodings);
85 /* Tries to turn a NULL-terminated string into UTF8.
87 * First, see if it's valid UTF8, in which case there's nothing more
88 * to be done. Next, run through the colon-separated encodings in
89 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
90 * returning the first successful conversion to utf8. If no
91 * conversion succeeds, return NULL.
93 * Callers must free the returned string if not NULL.
95 * This function is identical to mono_unicode_from_external, apart
96 * from returning utf8 not utf16; it's handy in a few places to work
99 gchar *mono_utf8_from_external (const gchar *in)
103 const gchar *encoding_list;
110 if(g_utf8_validate (in, -1, NULL)) {
111 return(g_strdup (in));
114 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
115 if(encoding_list==NULL) {
119 encodings=g_strsplit (encoding_list, ":", 0);
120 for(i=0;encodings[i]!=NULL; i++) {
122 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
126 /* "default_locale" is a special case encoding */
127 if(!strcmp (encodings[i], "default_locale")) {
128 res=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
129 if(res!=NULL && !g_utf8_validate (res, -1, NULL)) {
134 res=g_convert (in, -1, "UTF8", encodings[i], NULL,
139 g_strfreev (encodings);
144 g_strfreev (encodings);
149 /* Turns NULL-terminated UTF16LE into either UTF8, or the first
150 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
151 * work, then UTF8 is returned.
153 * Callers must free the returned string.
155 gchar *mono_unicode_to_external (const gunichar2 *uni)
158 const gchar *encoding_list;
160 /* Turn the unicode into utf8 to start with, because its
161 * easier to work with gchar * than gunichar2 *
163 utf8=g_utf16_to_utf8 (uni, -1, NULL, NULL, NULL);
164 g_assert (utf8!=NULL);
166 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
167 if(encoding_list==NULL) {
171 gchar *res, **encodings;
174 encodings=g_strsplit (encoding_list, ":", 0);
175 for(i=0; encodings[i]!=NULL; i++) {
176 if(!strcmp (encodings[i], "default_locale")) {
177 res=g_locale_from_utf8 (utf8, -1, NULL, NULL,
180 res=g_convert (utf8, -1, encodings[i], "UTF8",
186 g_strfreev (encodings);
192 g_strfreev (encodings);
195 /* Nothing else worked, so just return the utf8 */