2 * unicode.h: Unicode support
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
14 #include <mono/metadata/object.h>
15 #include <mono/metadata/unicode.h>
23 static MonoUnicodeCategory catmap[] = {
24 /* G_UNICODE_CONTROL = */ Control,
25 /* G_UNICODE_FORMAT = */ Format,
26 /* G_UNICODE_UNASSIGNED = */ OtherNotAssigned,
27 /* G_UNICODE_PRIVATE_USE = */ PrivateUse,
28 /* G_UNICODE_SURROGATE = */ Surrogate,
29 /* G_UNICODE_LOWERCASE_LETTER = */ LowercaseLetter,
30 /* G_UNICODE_MODIFIER_LETTER = */ ModifierLetter,
31 /* G_UNICODE_OTHER_LETTER = */ OtherLetter,
32 /* G_UNICODE_TITLECASE_LETTER = */ TitlecaseLetter,
33 /* G_UNICODE_UPPERCASE_LETTER = */ UppercaseLetter,
34 /* G_UNICODE_COMBINING_MARK = */ SpaceCombiningMark,
35 /* G_UNICODE_ENCLOSING_MARK = */ EnclosingMark,
36 /* G_UNICODE_NON_SPACING_MARK = */ NonSpacingMark,
37 /* G_UNICODE_DECIMAL_NUMBER = */ DecimalDigitNumber,
38 /* G_UNICODE_LETTER_NUMBER = */ LetterNumber,
39 /* G_UNICODE_OTHER_NUMBER = */ OtherNumber,
40 /* G_UNICODE_CONNECT_PUNCTUATION = */ ConnectorPunctuation,
41 /* G_UNICODE_DASH_PUNCTUATION = */ DashPunctuation,
42 /* G_UNICODE_CLOSE_PUNCTUATION = */ ClosePunctuation,
43 /* G_UNICODE_FINAL_PUNCTUATION = */ FinalQuotePunctuation,
44 /* G_UNICODE_INITIAL_PUNCTUATION = */ InitialQuotePunctuation,
45 /* G_UNICODE_OTHER_PUNCTUATION = */ OtherPunctuation,
46 /* G_UNICODE_OPEN_PUNCTUATION = */ OpenPunctuation,
47 /* G_UNICODE_CURRENCY_SYMBOL = */ CurrencySymbol,
48 /* G_UNICODE_MODIFIER_SYMBOL = */ ModifierSymbol,
49 /* G_UNICODE_MATH_SYMBOL = */ MathSymbol,
50 /* G_UNICODE_OTHER_SYMBOL = */ OtherSymbol,
51 /* G_UNICODE_LINE_SEPARATOR = */ LineSeperator,
52 /* G_UNICODE_PARAGRAPH_SEPARATOR = */ ParagraphSeperator,
53 /* G_UNICODE_SPACE_SEPARATOR = */ SpaceSeperator,
57 ves_icall_System_Char_GetNumericValue (gunichar2 c)
59 return (double)g_unichar_digit_value (c);
63 ves_icall_System_Char_GetUnicodeCategory (gunichar2 c)
65 return catmap [g_unichar_type (c)];
69 ves_icall_System_Char_IsControl (gunichar2 c)
71 return g_unichar_iscntrl (c);
75 ves_icall_System_Char_IsDigit (gunichar2 c)
77 return g_unichar_isdigit (c);
81 ves_icall_System_Char_IsLetter (gunichar2 c)
83 return g_unichar_isalpha (c);
87 ves_icall_System_Char_IsLower (gunichar2 c)
89 return g_unichar_islower (c);
93 ves_icall_System_Char_IsUpper (gunichar2 c)
95 return g_unichar_isupper (c);
99 ves_icall_System_Char_IsNumber (gunichar2 c)
101 GUnicodeType t = g_unichar_type (c);
102 return t == G_UNICODE_DECIMAL_NUMBER ||
103 t == G_UNICODE_LETTER_NUMBER ||
104 t == G_UNICODE_OTHER_NUMBER;
108 ves_icall_System_Char_IsPunctuation (gunichar2 c)
110 return g_unichar_ispunct (c);
114 ves_icall_System_Char_IsSeparator (gunichar2 c)
116 GUnicodeType t = g_unichar_type (c);
118 return (t == G_UNICODE_LINE_SEPARATOR ||
119 t == G_UNICODE_PARAGRAPH_SEPARATOR ||
120 t == G_UNICODE_SPACE_SEPARATOR);
124 ves_icall_System_Char_IsSurrogate (gunichar2 c)
126 return (g_unichar_type (c) == G_UNICODE_SURROGATE);
130 ves_icall_System_Char_IsSymbol (gunichar2 c)
132 GUnicodeType t = g_unichar_type (c);
134 return (t == G_UNICODE_CURRENCY_SYMBOL ||
135 t == G_UNICODE_MODIFIER_SYMBOL ||
136 t == G_UNICODE_MATH_SYMBOL ||
137 t == G_UNICODE_OTHER_SYMBOL);
141 ves_icall_System_Char_IsWhiteSpace (gunichar2 c)
143 return g_unichar_isspace (c);
147 ves_icall_System_Char_ToLower (gunichar2 c)
149 return g_unichar_tolower (c);
153 ves_icall_System_Char_ToUpper (gunichar2 c)
155 return g_unichar_toupper (c);
159 ves_icall_iconv_new_encoder (MonoString *name, MonoBoolean big_endian)
164 /* FIXME: don't enforce big endian, support old iconv */
168 n = mono_string_to_utf8 (name);
170 /* force big endian before class libraries are fixed */
171 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
175 #ifdef HAVE_NEW_ICONV
176 cd = iconv_open (n, big_endian ? "UTF-16be" : "UTF-16le");
178 cd = iconv_open (n, "UTF-16");
180 g_assert (cd != (iconv_t)-1);
187 ves_icall_iconv_new_decoder (MonoString *name, MonoBoolean big_endian)
192 /* FIXME: don't enforce big endian, support old iconv */
196 n = mono_string_to_utf8 (name);
198 /* force big endian before class libraries are fixed */
199 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
203 #ifdef HAVE_NEW_ICONV
204 cd = iconv_open (big_endian ? "UTF-16be" : "UTF-16le", n);
206 cd = iconv_open ("UTF-16", n);
208 g_assert (cd != (iconv_t)-1);
215 ves_icall_iconv_reset (gpointer converter)
217 iconv_t cd = (iconv_t)converter;
221 iconv(cd, NULL, NULL, NULL, NULL);
225 iconv_get_length (iconv_t cd, guchar *src, int len, gboolean encode)
231 guint inbytes_remaining;
232 guint outbytes_remaining;
234 gboolean have_error = FALSE;
240 #ifndef HAVE_NEW_ICONV
241 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode) {
244 src = g_memdup (src, len);
245 for (i = 0; i < len; i += 2) {
247 src [i] = src [i + 1];
254 inbytes_remaining = len;
259 outbytes_remaining = outbuf_size;
262 err = iconv (cd, (char **)&p, &inbytes_remaining,
263 (char **)&outp, &outbytes_remaining);
265 if(err == (size_t)-1) {
268 /* Incomplete text, do not report an error */
286 if((p - src) != len) {
292 #ifndef HAVE_NEW_ICONV
293 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode)
298 g_assert_not_reached ();
306 ves_icall_iconv_get_byte_count (gpointer converter, MonoArray *chars, gint32 idx, gint32 count)
308 iconv_t cd = (iconv_t)converter;
314 g_assert (mono_array_length (chars) > idx);
315 g_assert (mono_array_length (chars) >= (idx + count));
317 if (!(len = (mono_array_length (chars) - idx) * 2))
320 src = mono_array_addr (chars, guint16, idx);
322 return iconv_get_length (cd, src, len, TRUE);
326 iconv_convert (iconv_t cd, guchar *src, int len, guchar *dest, int max_len, gboolean encode)
329 guint inbytes_remaining;
330 guint outbytes_remaining;
332 gboolean have_error = FALSE;
339 #ifndef HAVE_NEW_ICONV
340 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode) {
343 src = g_memdup (src, len);
344 for (i = 0; i < len; i += 2) {
346 src [i] = src [i + 1];
353 inbytes_remaining = len;
354 outbuf_size = max_len;
356 outbytes_remaining = outbuf_size;
360 err = iconv (cd, (char **)&p, &inbytes_remaining, (char **)&outp, &outbytes_remaining);
362 if(err == (size_t)-1) {
363 if (errno == EINVAL) {
364 /* Incomplete text, do not report an error */
365 } else if (errno == EILSEQ) {
374 if ((p - src) != len) {
380 #ifndef HAVE_NEW_ICONV
381 if (G_BYTE_ORDER == G_LITTLE_ENDIAN) {
385 int mb = max_len - outbytes_remaining;
387 for (i = 0; i < mb; i+=2) {
389 dest [i] = dest [i + 1];
396 g_assert_not_reached ();
399 /* we return the number of bytes written in dest */
400 return max_len - outbytes_remaining;
405 ves_icall_iconv_get_bytes (gpointer converter, MonoArray *chars, gint32 charIndex, gint32 charCount,
406 MonoArray *bytes, gint32 byteIndex)
408 iconv_t cd = (iconv_t)converter;
418 g_assert (mono_array_length (chars) > charIndex);
419 g_assert (mono_array_length (chars) >= (charIndex + charCount));
420 g_assert (mono_array_length (bytes) > byteIndex);
421 g_assert (mono_array_length (chars) >= (byteIndex + charCount));
423 if (!(len = (charCount - charIndex) * 2))
426 src = mono_array_addr (chars, guint16, charIndex);
427 dest = mono_array_addr (bytes, char, byteIndex);
429 max_len = mono_array_length (bytes) - byteIndex;
431 return iconv_convert (cd, src, len, dest, max_len, TRUE);
435 ves_icall_iconv_get_char_count (gpointer converter, MonoArray *bytes, gint32 idx, gint32 count)
437 iconv_t cd = (iconv_t)converter;
442 g_assert (mono_array_length (bytes) > idx);
443 g_assert (mono_array_length (bytes) >= (idx + count));
445 src = mono_array_addr (bytes, char, idx);
447 /* iconv_get_length () returns the number of bytes */
448 return iconv_get_length (cd, src, (int) count, FALSE) / 2;
452 ves_icall_iconv_get_chars (gpointer converter, MonoArray *bytes, gint32 byteIndex, gint32 byteCount,
453 MonoArray *chars, gint32 charIndex)
455 iconv_t cd = (iconv_t)converter;
462 g_assert (mono_array_length (bytes) > byteIndex);
463 g_assert (mono_array_length (chars) >= (byteIndex + byteCount));
464 g_assert (mono_array_length (chars) > charIndex);
466 src = mono_array_addr (bytes, char, byteIndex);
467 dest = mono_array_addr (chars, guint16, charIndex);
469 max_len = (mono_array_length (chars) - charIndex) * 2;
471 /* iconv_convert () returns the number of bytes */
472 return iconv_convert (cd, src, (int) byteCount, dest, max_len, FALSE) / 2;