2 * strenc.c: string encoding conversions
5 * Dick Porter (dick@ximian.com)
7 * (C) 2003 Ximian, Inc.
19 * mono_unicode_from_external:
20 * @in: pointers to the buffer.
21 * @bytes: number of bytes in the string.
23 * Tries to turn a NULL-terminated string into UTF16.
25 * First, see if it's valid UTF8, in which case just turn it directly
26 * into UTF16. Next, run through the colon-separated encodings in
27 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
28 * returning the first successful conversion to UTF16. If no
29 * conversion succeeds, return NULL.
31 * Callers must free the returned string if not NULL. bytes holds the number
32 * of bytes in the returned string, not including the terminator.
35 mono_unicode_from_external (const gchar *in, gsize *bytes)
39 const gchar *encoding_list;
47 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
48 if(encoding_list==NULL) {
52 encodings=g_strsplit (encoding_list, ":", 0);
53 for(i=0;encodings[i]!=NULL; i++) {
55 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
58 /* "default_locale" is a special case encoding */
59 if(!strcmp (encodings[i], "default_locale")) {
60 gchar *utf8=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
62 res=(gchar *) g_utf8_to_utf16 (utf8, -1, NULL, &lbytes, NULL);
63 *bytes = (gsize) lbytes;
67 /* Don't use UTF16 here. It returns the <FF FE> prepended to the string */
68 res = g_convert (in, strlen (in), "UTF8", encodings[i], NULL, bytes, NULL);
71 res = (gchar *) g_utf8_to_utf16 (res, -1, NULL, &lbytes, NULL);
72 *bytes = (gsize) lbytes;
78 g_strfreev (encodings);
80 return((gunichar2 *)res);
84 g_strfreev (encodings);
86 if(g_utf8_validate (in, -1, NULL)) {
87 gunichar2 *unires=g_utf8_to_utf16 (in, -1, NULL, (glong *)bytes, NULL);
96 * mono_utf8_from_external:
97 * @in: pointer to the string buffer.
99 * Tries to turn a NULL-terminated string into UTF8.
101 * First, see if it's valid UTF8, in which case there's nothing more
102 * to be done. Next, run through the colon-separated encodings in
103 * MONO_EXTERNAL_ENCODINGS and do an iconv conversion on each,
104 * returning the first successful conversion to utf8. If no
105 * conversion succeeds, return NULL.
107 * Callers must free the returned string if not NULL.
109 * This function is identical to mono_unicode_from_external, apart
110 * from returning utf8 not utf16; it's handy in a few places to work
113 gchar *mono_utf8_from_external (const gchar *in)
117 const gchar *encoding_list;
124 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
125 if(encoding_list==NULL) {
129 encodings=g_strsplit (encoding_list, ":", 0);
130 for(i=0;encodings[i]!=NULL; i++) {
132 g_message (G_GNUC_PRETTY_FUNCTION ": Trying encoding [%s]",
136 /* "default_locale" is a special case encoding */
137 if(!strcmp (encodings[i], "default_locale")) {
138 res=g_locale_to_utf8 (in, -1, NULL, NULL, NULL);
139 if(res!=NULL && !g_utf8_validate (res, -1, NULL)) {
144 res=g_convert (in, -1, "UTF8", encodings[i], NULL,
149 g_strfreev (encodings);
154 g_strfreev (encodings);
156 if(g_utf8_validate (in, -1, NULL)) {
157 return(g_strdup (in));
164 * mono_unicode_to_external:
165 * @uni: an UTF16 string to conver to an external representation.
167 * Turns NULL-terminated UTF16 into either UTF8, or the first
168 * working item in MONO_EXTERNAL_ENCODINGS if set. If no conversions
169 * work, then UTF8 is returned.
171 * Callers must free the returned string.
173 gchar *mono_unicode_to_external (const gunichar2 *uni)
176 const gchar *encoding_list;
178 /* Turn the unicode into utf8 to start with, because its
179 * easier to work with gchar * than gunichar2 *
181 utf8=g_utf16_to_utf8 (uni, -1, NULL, NULL, NULL);
182 g_assert (utf8!=NULL);
184 encoding_list=g_getenv ("MONO_EXTERNAL_ENCODINGS");
185 if(encoding_list==NULL) {
189 gchar *res, **encodings;
192 encodings=g_strsplit (encoding_list, ":", 0);
193 for(i=0; encodings[i]!=NULL; i++) {
194 if(!strcmp (encodings[i], "default_locale")) {
195 res=g_locale_from_utf8 (utf8, -1, NULL, NULL,
198 res=g_convert (utf8, -1, encodings[i], "UTF8",
204 g_strfreev (encodings);
210 g_strfreev (encodings);
213 /* Nothing else worked, so just return the utf8 */