2 * unicode.h: Unicode support
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
14 #include <mono/metadata/object.h>
15 #include <mono/metadata/unicode.h>
19 static MonoUnicodeCategory catmap[] = {
20 /* G_UNICODE_CONTROL = */ Control,
21 /* G_UNICODE_FORMAT = */ Format,
22 /* G_UNICODE_UNASSIGNED = */ OtherNotAssigned,
23 /* G_UNICODE_PRIVATE_USE = */ PrivateUse,
24 /* G_UNICODE_SURROGATE = */ Surrogate,
25 /* G_UNICODE_LOWERCASE_LETTER = */ LowercaseLetter,
26 /* G_UNICODE_MODIFIER_LETTER = */ ModifierLetter,
27 /* G_UNICODE_OTHER_LETTER = */ OtherLetter,
28 /* G_UNICODE_TITLECASE_LETTER = */ TitlecaseLetter,
29 /* G_UNICODE_UPPERCASE_LETTER = */ UppercaseLetter,
30 /* G_UNICODE_COMBINING_MARK = */ SpaceCombiningMark,
31 /* G_UNICODE_ENCLOSING_MARK = */ EnclosingMark,
32 /* G_UNICODE_NON_SPACING_MARK = */ NonSpacingMark,
33 /* G_UNICODE_DECIMAL_NUMBER = */ DecimalDigitNumber,
34 /* G_UNICODE_LETTER_NUMBER = */ LetterNumber,
35 /* G_UNICODE_OTHER_NUMBER = */ OtherNumber,
36 /* G_UNICODE_CONNECT_PUNCTUATION = */ ConnectorPunctuation,
37 /* G_UNICODE_DASH_PUNCTUATION = */ DashPunctuation,
38 /* G_UNICODE_CLOSE_PUNCTUATION = */ ClosePunctuation,
39 /* G_UNICODE_FINAL_PUNCTUATION = */ FinalQuotePunctuation,
40 /* G_UNICODE_INITIAL_PUNCTUATION = */ InitialQuotePunctuation,
41 /* G_UNICODE_OTHER_PUNCTUATION = */ OtherPunctuation,
42 /* G_UNICODE_OPEN_PUNCTUATION = */ OpenPunctuation,
43 /* G_UNICODE_CURRENCY_SYMBOL = */ CurrencySymbol,
44 /* G_UNICODE_MODIFIER_SYMBOL = */ ModifierSymbol,
45 /* G_UNICODE_MATH_SYMBOL = */ MathSymbol,
46 /* G_UNICODE_OTHER_SYMBOL = */ OtherSymbol,
47 /* G_UNICODE_LINE_SEPARATOR = */ LineSeperator,
48 /* G_UNICODE_PARAGRAPH_SEPARATOR = */ ParagraphSeperator,
49 /* G_UNICODE_SPACE_SEPARATOR = */ SpaceSeperator,
53 ves_icall_System_Char_GetNumericValue (gunichar2 c)
55 return (double)g_unichar_digit_value (c);
59 ves_icall_System_Char_GetUnicodeCategory (gunichar2 c)
61 return catmap [g_unichar_type (c)];
65 ves_icall_System_Char_IsControl (gunichar2 c)
67 return g_unichar_iscntrl (c);
71 ves_icall_System_Char_IsDigit (gunichar2 c)
73 return g_unichar_isdigit (c);
77 ves_icall_System_Char_IsLetter (gunichar2 c)
79 return g_unichar_isalpha (c);
83 ves_icall_System_Char_IsLower (gunichar2 c)
85 return g_unichar_islower (c);
89 ves_icall_System_Char_IsUpper (gunichar2 c)
91 return g_unichar_isupper (c);
95 ves_icall_System_Char_IsNumber (gunichar2 c)
97 return g_unichar_isdigit (c);
101 ves_icall_System_Char_IsPunctuation (gunichar2 c)
103 return g_unichar_ispunct (c);
107 ves_icall_System_Char_IsSeparator (gunichar2 c)
109 GUnicodeType t = g_unichar_type (c);
111 return (t == G_UNICODE_LINE_SEPARATOR ||
112 t == G_UNICODE_PARAGRAPH_SEPARATOR ||
113 t == G_UNICODE_SPACE_SEPARATOR);
117 ves_icall_System_Char_IsSurrogate (gunichar2 c)
119 return (g_unichar_type (c) == G_UNICODE_SURROGATE);
123 ves_icall_System_Char_IsSymbol (gunichar2 c)
125 GUnicodeType t = g_unichar_type (c);
127 return (t == G_UNICODE_CURRENCY_SYMBOL ||
128 t == G_UNICODE_MODIFIER_SYMBOL ||
129 t == G_UNICODE_MATH_SYMBOL ||
130 t == G_UNICODE_OTHER_SYMBOL);
134 ves_icall_System_Char_IsWhiteSpace (gunichar2 c)
136 return g_unichar_isspace (c);
140 ves_icall_System_Char_ToLower (gunichar2 c)
142 return g_unichar_tolower (c);
146 ves_icall_System_Char_ToUpper (gunichar2 c)
148 return g_unichar_toupper (c);
152 ves_icall_iconv_new_encoder (MonoString *name, MonoBoolean big_endian)
157 // fixme: add support big_endian
161 n = mono_string_to_utf8 (name);
163 #ifdef HAVE_NEW_ICONV
164 cd = iconv_open (n, "UTF-16le");
166 cd = iconv_open (n, "UTF-16");
168 g_assert (cd != (iconv_t)-1);
174 ves_icall_iconv_new_decoder (MonoString *name, MonoBoolean big_endian)
179 // fixme: add support big_endian
183 n = mono_string_to_utf8 (name);
185 #ifdef HAVE_NEW_ICONV
186 cd = iconv_open ("UTF-16le", n);
188 cd = iconv_open ("UTF-16", n);
190 g_assert (cd != (iconv_t)-1);
196 ves_icall_iconv_reset (gpointer converter)
198 iconv_t cd = (iconv_t)converter;
202 iconv(cd, NULL, NULL, NULL, NULL);
206 iconv_get_length (iconv_t cd, guchar *src, int len)
212 guint inbytes_remaining;
213 guint outbytes_remaining;
215 gboolean have_error = FALSE;
221 #ifndef HAVE_NEW_ICONV
222 if (G_BYTE_ORDER == G_LITTLE_ENDIAN) {
223 src = g_memdup (src, len);
224 for (i = 0; i < len; i += 2) {
226 src [i] = src [i + 1];
233 inbytes_remaining = len;
238 outbytes_remaining = outbuf_size;
241 err = iconv (cd, (const char **)&p, &inbytes_remaining,
242 (char **)&outp, &outbytes_remaining);
244 if(err == (size_t)-1) {
247 /* Incomplete text, do not report an error */
264 if((p - src) != len) {
270 #ifndef HAVE_NEW_ICONV
271 if (G_BYTE_ORDER == G_LITTLE_ENDIAN)
276 g_assert_not_reached ();
284 ves_icall_iconv_get_byte_count (gpointer converter, MonoArray *chars, gint32 index, gint32 count)
286 iconv_t cd = (iconv_t)converter;
292 g_assert (mono_array_length (chars) > index);
293 g_assert (mono_array_length (chars) >= (index + count));
295 if (!(len = (mono_array_length (chars) - index) * 2))
298 src = mono_array_addr (chars, guint16, index);
300 return iconv_get_length (cd, src, len);
304 iconv_convert (iconv_t cd, guchar *src, int len, guchar *dest, int max_len)
308 guint inbytes_remaining;
309 guint outbytes_remaining;
311 gboolean have_error = FALSE;
318 #ifndef HAVE_NEW_ICONV
319 if (G_BYTE_ORDER == G_LITTLE_ENDIAN) {
320 src = g_memdup (src, len);
321 for (i = 0; i < len; i += 2) {
323 src [i] = src [i + 1];
330 inbytes_remaining = len;
331 outbuf_size = max_len;
333 outbytes_remaining = outbuf_size;
336 err = iconv (cd, (const char **)&p, &inbytes_remaining, (char **)&outp, &outbytes_remaining);
338 if(err == (size_t)-1) {
339 if (errno == EINVAL) {
340 /* Incomplete text, do not report an error */
346 if ((p - src) != len) {
352 #ifndef HAVE_NEW_ICONV
353 if (G_BYTE_ORDER == G_LITTLE_ENDIAN)
357 g_assert_not_reached ();
360 /* we return the number of bytes written in dest */
361 return max_len - outbytes_remaining;
366 ves_icall_iconv_get_bytes (gpointer converter, MonoArray *chars, gint32 charIndex, gint32 charCount,
367 MonoArray *bytes, gint32 byteIndex)
369 iconv_t cd = (iconv_t)converter;
376 g_assert (mono_array_length (chars) > charIndex);
377 g_assert (mono_array_length (chars) >= (charIndex + charCount));
378 g_assert (mono_array_length (bytes) > byteIndex);
379 g_assert (mono_array_length (chars) >= (byteIndex + charCount));
381 if (!(len = (mono_array_length (chars) - charIndex) * 2))
384 src = mono_array_addr (chars, guint16, charIndex);
385 dest = mono_array_addr (bytes, char, byteIndex);
387 max_len = mono_array_length (bytes) - byteIndex;
389 return iconv_convert (cd, src, len, dest, max_len);
393 ves_icall_iconv_get_char_count (gpointer converter, MonoArray *bytes, gint32 index, gint32 count)
395 iconv_t cd = (iconv_t)converter;
401 g_assert (mono_array_length (bytes) > index);
402 g_assert (mono_array_length (bytes) >= (index + count));
404 if (!(len = (mono_array_length (bytes) - index)))
407 src = mono_array_addr (bytes, char, index);
409 return iconv_get_length (cd, src, len);
413 ves_icall_iconv_get_chars (gpointer converter, MonoArray *bytes, gint32 byteIndex, gint32 byteCount,
414 MonoArray *chars, gint32 charIndex)
416 iconv_t cd = (iconv_t)converter;
423 g_assert (mono_array_length (bytes) > byteIndex);
424 g_assert (mono_array_length (chars) >= (byteIndex + byteCount));
425 g_assert (mono_array_length (chars) > charIndex);
427 if (!(len = (mono_array_length (bytes) - byteIndex)))
430 src = mono_array_addr (bytes, char, byteIndex);
431 dest = mono_array_addr (chars, guint16, charIndex);
433 max_len = (mono_array_length (chars) - charIndex) * 2;
435 /* iconv_convert () returns the number of bytes */
436 return iconv_convert (cd, src, len, dest, max_len) / 2;