2 * strenc.c: string encoding conversions
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
18 /* Tries to turn a NULL-terminated string into UTF16.
20 * First, see if it's valid UTF8, in which case just turn it directly
21 * into UTF16. Next, run through the colon-separated encodings in
22 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
23 * returning the first successful conversion to UTF16. If no
24 * conversion succeeds, return NULL.
26 * Callers must free the returned string if not NULL. bytes holds the number
27 * of bytes in the returned string, not including the terminator.
29 gunichar2 *mono_unicode_from_external (const gchar *in, gsize *bytes)
33 const gchar *encoding_list;
41 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
42 if(encoding_list==NULL) {
46 encodings=g_strsplit (encoding_list, ":", 0);
47 for(i=0;encodings[i]!=NULL; i++) {
49 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
52 /* "default_locale" is a special case encoding */
53 if(!strcmp (encodings[i], "default_locale")) {
54 gchar *utf8=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
56 res=(gchar *) g_utf8_to_utf16 (utf8, -1, NULL, &lbytes, NULL);
57 *bytes = (gsize) lbytes;
61 res=g_convert (in, -1, "UTF16", encodings[i], NULL, bytes, NULL);
65 g_strfreev (encodings);
67 return((gunichar2 *)res);
71 g_strfreev (encodings);
73 if(g_utf8_validate (in, -1, NULL)) {
74 gunichar2 *unires=g_utf8_to_utf16 (in, -1, NULL, (glong *)bytes, NULL);
82 /* Tries to turn a NULL-terminated string into UTF8.
84 * First, see if it's valid UTF8, in which case there's nothing more
85 * to be done. Next, run through the colon-separated encodings in
86 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
87 * returning the first successful conversion to utf8. If no
88 * conversion succeeds, return NULL.
90 * Callers must free the returned string if not NULL.
92 * This function is identical to mono_unicode_from_external, apart
93 * from returning utf8 not utf16; it's handy in a few places to work
96 gchar *mono_utf8_from_external (const gchar *in)
100 const gchar *encoding_list;
107 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
108 if(encoding_list==NULL) {
112 encodings=g_strsplit (encoding_list, ":", 0);
113 for(i=0;encodings[i]!=NULL; i++) {
115 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
119 /* "default_locale" is a special case encoding */
120 if(!strcmp (encodings[i], "default_locale")) {
121 res=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
122 if(res!=NULL && !g_utf8_validate (res, -1, NULL)) {
127 res=g_convert (in, -1, "UTF8", encodings[i], NULL,
132 g_strfreev (encodings);
137 g_strfreev (encodings);
139 if(g_utf8_validate (in, -1, NULL)) {
140 return(g_strdup (in));
146 /* Turns NULL-terminated UTF16 into either UTF8, or the first
147 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
148 * work, then UTF8 is returned.
150 * Callers must free the returned string.
152 gchar *mono_unicode_to_external (const gunichar2 *uni)
155 const gchar *encoding_list;
157 /* Turn the unicode into utf8 to start with, because its
158 * easier to work with gchar * than gunichar2 *
160 utf8=g_utf16_to_utf8 (uni, -1, NULL, NULL, NULL);
161 g_assert (utf8!=NULL);
163 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
164 if(encoding_list==NULL) {
168 gchar *res, **encodings;
171 encodings=g_strsplit (encoding_list, ":", 0);
172 for(i=0; encodings[i]!=NULL; i++) {
173 if(!strcmp (encodings[i], "default_locale")) {
174 res=g_locale_from_utf8 (utf8, -1, NULL, NULL,
177 res=g_convert (utf8, -1, encodings[i], "UTF8",
183 g_strfreev (encodings);
189 g_strfreev (encodings);
192 /* Nothing else worked, so just return the utf8 */