2 * unicode.h: Unicode support
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
14 #include <mono/metadata/object.h>
15 #include <mono/metadata/unicode.h>
19 static MonoUnicodeCategory catmap[] = {
20 /* G_UNICODE_CONTROL = */ Control,
21 /* G_UNICODE_FORMAT = */ Format,
22 /* G_UNICODE_UNASSIGNED = */ OtherNotAssigned,
23 /* G_UNICODE_PRIVATE_USE = */ PrivateUse,
24 /* G_UNICODE_SURROGATE = */ Surrogate,
25 /* G_UNICODE_LOWERCASE_LETTER = */ LowercaseLetter,
26 /* G_UNICODE_MODIFIER_LETTER = */ ModifierLetter,
27 /* G_UNICODE_OTHER_LETTER = */ OtherLetter,
28 /* G_UNICODE_TITLECASE_LETTER = */ TitlecaseLetter,
29 /* G_UNICODE_UPPERCASE_LETTER = */ UppercaseLetter,
30 /* G_UNICODE_COMBINING_MARK = */ SpaceCombiningMark,
31 /* G_UNICODE_ENCLOSING_MARK = */ EnclosingMark,
32 /* G_UNICODE_NON_SPACING_MARK = */ NonSpacingMark,
33 /* G_UNICODE_DECIMAL_NUMBER = */ DecimalDigitNumber,
34 /* G_UNICODE_LETTER_NUMBER = */ LetterNumber,
35 /* G_UNICODE_OTHER_NUMBER = */ OtherNumber,
36 /* G_UNICODE_CONNECT_PUNCTUATION = */ ConnectorPunctuation,
37 /* G_UNICODE_DASH_PUNCTUATION = */ DashPunctuation,
38 /* G_UNICODE_CLOSE_PUNCTUATION = */ ClosePunctuation,
39 /* G_UNICODE_FINAL_PUNCTUATION = */ FinalQuotePunctuation,
40 /* G_UNICODE_INITIAL_PUNCTUATION = */ InitialQuotePunctuation,
41 /* G_UNICODE_OTHER_PUNCTUATION = */ OtherPunctuation,
42 /* G_UNICODE_OPEN_PUNCTUATION = */ OpenPunctuation,
43 /* G_UNICODE_CURRENCY_SYMBOL = */ CurrencySymbol,
44 /* G_UNICODE_MODIFIER_SYMBOL = */ ModifierSymbol,
45 /* G_UNICODE_MATH_SYMBOL = */ MathSymbol,
46 /* G_UNICODE_OTHER_SYMBOL = */ OtherSymbol,
47 /* G_UNICODE_LINE_SEPARATOR = */ LineSeperator,
48 /* G_UNICODE_PARAGRAPH_SEPARATOR = */ ParagraphSeperator,
49 /* G_UNICODE_SPACE_SEPARATOR = */ SpaceSeperator,
53 ves_icall_System_Char_GetNumericValue (gunichar2 c)
55 return (double)g_unichar_digit_value (c);
59 ves_icall_System_Char_GetUnicodeCategory (gunichar2 c)
61 return catmap [g_unichar_type (c)];
65 ves_icall_System_Char_IsControl (gunichar2 c)
67 return g_unichar_iscntrl (c);
71 ves_icall_System_Char_IsDigit (gunichar2 c)
73 return g_unichar_isdigit (c);
77 ves_icall_System_Char_IsLetter (gunichar2 c)
79 return g_unichar_isalpha (c);
83 ves_icall_System_Char_IsLower (gunichar2 c)
85 return g_unichar_islower (c);
89 ves_icall_System_Char_IsUpper (gunichar2 c)
91 return g_unichar_isupper (c);
95 ves_icall_System_Char_IsNumber (gunichar2 c)
97 return g_unichar_isdigit (c);
101 ves_icall_System_Char_IsPunctuation (gunichar2 c)
103 return g_unichar_ispunct (c);
107 ves_icall_System_Char_IsSeparator (gunichar2 c)
109 GUnicodeType t = g_unichar_type (c);
111 return (t == G_UNICODE_LINE_SEPARATOR ||
112 t == G_UNICODE_PARAGRAPH_SEPARATOR ||
113 t == G_UNICODE_SPACE_SEPARATOR);
117 ves_icall_System_Char_IsSurrogate (gunichar2 c)
119 return (g_unichar_type (c) == G_UNICODE_SURROGATE);
123 ves_icall_System_Char_IsSymbol (gunichar2 c)
125 GUnicodeType t = g_unichar_type (c);
127 return (t == G_UNICODE_CURRENCY_SYMBOL ||
128 t == G_UNICODE_MODIFIER_SYMBOL ||
129 t == G_UNICODE_MATH_SYMBOL ||
130 t == G_UNICODE_OTHER_SYMBOL);
134 ves_icall_System_Char_IsWhiteSpace (gunichar2 c)
136 return g_unichar_isspace (c);
140 ves_icall_System_Char_ToLower (gunichar2 c)
142 return g_unichar_tolower (c);
146 ves_icall_System_Char_ToUpper (gunichar2 c)
148 return g_unichar_toupper (c);
152 ves_icall_iconv_new_encoder (MonoString *name, MonoBoolean big_endian)
157 // fixme: don't enforce big endian, support old iconv
161 n = mono_string_to_utf8 (name);
163 /* force big endian before class libraries are fixed */
164 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
168 #ifdef HAVE_NEW_ICONV
169 cd = iconv_open (n, big_endian ? "UTF-16be" : "UTF-16le");
171 cd = iconv_open (n, "UTF-16");
173 g_assert (cd != (iconv_t)-1);
179 ves_icall_iconv_new_decoder (MonoString *name, MonoBoolean big_endian)
184 // fixme: don't enforce big endian, support old iconv
188 n = mono_string_to_utf8 (name);
190 /* force big endian before class libraries are fixed */
191 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
195 #ifdef HAVE_NEW_ICONV
196 cd = iconv_open (big_endian ? "UTF-16be" : "UTF-16le", n);
198 cd = iconv_open ("UTF-16", n);
200 g_assert (cd != (iconv_t)-1);
206 ves_icall_iconv_reset (gpointer converter)
208 iconv_t cd = (iconv_t)converter;
212 iconv(cd, NULL, NULL, NULL, NULL);
216 iconv_get_length (iconv_t cd, guchar *src, int len, gboolean encode)
222 guint inbytes_remaining;
223 guint outbytes_remaining;
225 gboolean have_error = FALSE;
231 #ifndef HAVE_NEW_ICONV
232 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode) {
235 src = g_memdup (src, len);
236 for (i = 0; i < len; i += 2) {
238 src [i] = src [i + 1];
245 inbytes_remaining = len;
250 outbytes_remaining = outbuf_size;
253 err = iconv (cd, (char **)&p, &inbytes_remaining,
254 (char **)&outp, &outbytes_remaining);
256 if(err == (size_t)-1) {
259 /* Incomplete text, do not report an error */
276 if((p - src) != len) {
282 #ifndef HAVE_NEW_ICONV
283 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode)
288 g_assert_not_reached ();
296 ves_icall_iconv_get_byte_count (gpointer converter, MonoArray *chars, gint32 idx, gint32 count)
298 iconv_t cd = (iconv_t)converter;
304 g_assert (mono_array_length (chars) > idx);
305 g_assert (mono_array_length (chars) >= (idx + count));
307 if (!(len = (mono_array_length (chars) - idx) * 2))
310 src = mono_array_addr (chars, guint16, idx);
312 return iconv_get_length (cd, src, len, TRUE);
316 iconv_convert (iconv_t cd, guchar *src, int len, guchar *dest, int max_len, gboolean encode)
319 guint inbytes_remaining;
320 guint outbytes_remaining;
322 gboolean have_error = FALSE;
329 #ifndef HAVE_NEW_ICONV
330 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode) {
333 src = g_memdup (src, len);
334 for (i = 0; i < len; i += 2) {
336 src [i] = src [i + 1];
343 inbytes_remaining = len;
344 outbuf_size = max_len;
346 outbytes_remaining = outbuf_size;
349 err = iconv (cd, (char **)&p, &inbytes_remaining, (char **)&outp, &outbytes_remaining);
351 if(err == (size_t)-1) {
352 if (errno == EINVAL) {
353 /* Incomplete text, do not report an error */
359 if ((p - src) != len) {
365 #ifndef HAVE_NEW_ICONV
366 if (G_BYTE_ORDER == G_LITTLE_ENDIAN) {
370 int mb = max_len - outbytes_remaining;
372 for (i = 0; i < mb; i+=2) {
374 dest [i] = dest [i + 1];
381 g_assert_not_reached ();
384 /* we return the number of bytes written in dest */
385 return max_len - outbytes_remaining;
390 ves_icall_iconv_get_bytes (gpointer converter, MonoArray *chars, gint32 charIndex, gint32 charCount,
391 MonoArray *bytes, gint32 byteIndex)
393 iconv_t cd = (iconv_t)converter;
403 g_assert (mono_array_length (chars) > charIndex);
404 g_assert (mono_array_length (chars) >= (charIndex + charCount));
405 g_assert (mono_array_length (bytes) > byteIndex);
406 g_assert (mono_array_length (chars) >= (byteIndex + charCount));
408 if (!(len = (mono_array_length (chars) - charIndex) * 2))
411 src = mono_array_addr (chars, guint16, charIndex);
412 dest = mono_array_addr (bytes, char, byteIndex);
414 max_len = mono_array_length (bytes) - byteIndex;
416 return iconv_convert (cd, src, len, dest, max_len, TRUE);
420 ves_icall_iconv_get_char_count (gpointer converter, MonoArray *bytes, gint32 idx, gint32 count)
422 iconv_t cd = (iconv_t)converter;
427 g_assert (mono_array_length (bytes) > idx);
428 g_assert (mono_array_length (bytes) >= (idx + count));
430 src = mono_array_addr (bytes, char, idx);
432 /* iconv_get_length () returns the number of bytes */
433 return iconv_get_length (cd, src, (int) count, FALSE) / 2;
437 ves_icall_iconv_get_chars (gpointer converter, MonoArray *bytes, gint32 byteIndex, gint32 byteCount,
438 MonoArray *chars, gint32 charIndex)
440 iconv_t cd = (iconv_t)converter;
447 g_assert (mono_array_length (bytes) > byteIndex);
448 g_assert (mono_array_length (chars) >= (byteIndex + byteCount));
449 g_assert (mono_array_length (chars) > charIndex);
451 src = mono_array_addr (bytes, char, byteIndex);
452 dest = mono_array_addr (chars, guint16, charIndex);
454 max_len = (mono_array_length (chars) - charIndex) * 2;
456 /* iconv_convert () returns the number of bytes */
457 return iconv_convert (cd, src, (int) byteCount, dest, max_len, FALSE) / 2;