2 * unicode.h: Unicode support
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
14 #include <mono/metadata/object.h>
15 #include <mono/metadata/unicode.h>
23 static MonoUnicodeCategory catmap[] = {
24 /* G_UNICODE_CONTROL = */ Control,
25 /* G_UNICODE_FORMAT = */ Format,
26 /* G_UNICODE_UNASSIGNED = */ OtherNotAssigned,
27 /* G_UNICODE_PRIVATE_USE = */ PrivateUse,
28 /* G_UNICODE_SURROGATE = */ Surrogate,
29 /* G_UNICODE_LOWERCASE_LETTER = */ LowercaseLetter,
30 /* G_UNICODE_MODIFIER_LETTER = */ ModifierLetter,
31 /* G_UNICODE_OTHER_LETTER = */ OtherLetter,
32 /* G_UNICODE_TITLECASE_LETTER = */ TitlecaseLetter,
33 /* G_UNICODE_UPPERCASE_LETTER = */ UppercaseLetter,
34 /* G_UNICODE_COMBINING_MARK = */ SpaceCombiningMark,
35 /* G_UNICODE_ENCLOSING_MARK = */ EnclosingMark,
36 /* G_UNICODE_NON_SPACING_MARK = */ NonSpacingMark,
37 /* G_UNICODE_DECIMAL_NUMBER = */ DecimalDigitNumber,
38 /* G_UNICODE_LETTER_NUMBER = */ LetterNumber,
39 /* G_UNICODE_OTHER_NUMBER = */ OtherNumber,
40 /* G_UNICODE_CONNECT_PUNCTUATION = */ ConnectorPunctuation,
41 /* G_UNICODE_DASH_PUNCTUATION = */ DashPunctuation,
42 /* G_UNICODE_CLOSE_PUNCTUATION = */ ClosePunctuation,
43 /* G_UNICODE_FINAL_PUNCTUATION = */ FinalQuotePunctuation,
44 /* G_UNICODE_INITIAL_PUNCTUATION = */ InitialQuotePunctuation,
45 /* G_UNICODE_OTHER_PUNCTUATION = */ OtherPunctuation,
46 /* G_UNICODE_OPEN_PUNCTUATION = */ OpenPunctuation,
47 /* G_UNICODE_CURRENCY_SYMBOL = */ CurrencySymbol,
48 /* G_UNICODE_MODIFIER_SYMBOL = */ ModifierSymbol,
49 /* G_UNICODE_MATH_SYMBOL = */ MathSymbol,
50 /* G_UNICODE_OTHER_SYMBOL = */ OtherSymbol,
51 /* G_UNICODE_LINE_SEPARATOR = */ LineSeperator,
52 /* G_UNICODE_PARAGRAPH_SEPARATOR = */ ParagraphSeperator,
53 /* G_UNICODE_SPACE_SEPARATOR = */ SpaceSeperator,
57 ves_icall_System_Char_GetNumericValue (gunichar2 c)
59 return (double)g_unichar_digit_value (c);
63 ves_icall_System_Char_GetUnicodeCategory (gunichar2 c)
65 return catmap [g_unichar_type (c)];
69 ves_icall_System_Char_IsControl (gunichar2 c)
71 return g_unichar_iscntrl (c);
75 ves_icall_System_Char_IsDigit (gunichar2 c)
77 return g_unichar_isdigit (c);
81 ves_icall_System_Char_IsLetter (gunichar2 c)
83 return g_unichar_isalpha (c);
87 ves_icall_System_Char_IsLower (gunichar2 c)
89 return g_unichar_islower (c);
93 ves_icall_System_Char_IsUpper (gunichar2 c)
95 return g_unichar_isupper (c);
99 ves_icall_System_Char_IsNumber (gunichar2 c)
101 return g_unichar_isxdigit (c);
105 ves_icall_System_Char_IsPunctuation (gunichar2 c)
107 return g_unichar_ispunct (c);
111 ves_icall_System_Char_IsSeparator (gunichar2 c)
113 GUnicodeType t = g_unichar_type (c);
115 return (t == G_UNICODE_LINE_SEPARATOR ||
116 t == G_UNICODE_PARAGRAPH_SEPARATOR ||
117 t == G_UNICODE_SPACE_SEPARATOR);
121 ves_icall_System_Char_IsSurrogate (gunichar2 c)
123 return (g_unichar_type (c) == G_UNICODE_SURROGATE);
127 ves_icall_System_Char_IsSymbol (gunichar2 c)
129 GUnicodeType t = g_unichar_type (c);
131 return (t == G_UNICODE_CURRENCY_SYMBOL ||
132 t == G_UNICODE_MODIFIER_SYMBOL ||
133 t == G_UNICODE_MATH_SYMBOL ||
134 t == G_UNICODE_OTHER_SYMBOL);
138 ves_icall_System_Char_IsWhiteSpace (gunichar2 c)
140 return g_unichar_isspace (c);
144 ves_icall_System_Char_ToLower (gunichar2 c)
146 return g_unichar_tolower (c);
150 ves_icall_System_Char_ToUpper (gunichar2 c)
152 return g_unichar_toupper (c);
156 ves_icall_iconv_new_encoder (MonoString *name, MonoBoolean big_endian)
161 // fixme: don't enforce big endian, support old iconv
165 n = mono_string_to_utf8 (name);
167 /* force big endian before class libraries are fixed */
168 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
172 #ifdef HAVE_NEW_ICONV
173 cd = iconv_open (n, big_endian ? "UTF-16be" : "UTF-16le");
175 cd = iconv_open (n, "UTF-16");
177 g_assert (cd != (iconv_t)-1);
184 ves_icall_iconv_new_decoder (MonoString *name, MonoBoolean big_endian)
189 // fixme: don't enforce big endian, support old iconv
193 n = mono_string_to_utf8 (name);
195 /* force big endian before class libraries are fixed */
196 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
200 #ifdef HAVE_NEW_ICONV
201 cd = iconv_open (big_endian ? "UTF-16be" : "UTF-16le", n);
203 cd = iconv_open ("UTF-16", n);
205 g_assert (cd != (iconv_t)-1);
212 ves_icall_iconv_reset (gpointer converter)
214 iconv_t cd = (iconv_t)converter;
218 iconv(cd, NULL, NULL, NULL, NULL);
222 iconv_get_length (iconv_t cd, guchar *src, int len, gboolean encode)
228 guint inbytes_remaining;
229 guint outbytes_remaining;
231 gboolean have_error = FALSE;
237 #ifndef HAVE_NEW_ICONV
238 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode) {
241 src = g_memdup (src, len);
242 for (i = 0; i < len; i += 2) {
244 src [i] = src [i + 1];
251 inbytes_remaining = len;
256 outbytes_remaining = outbuf_size;
259 err = iconv (cd, (char **)&p, &inbytes_remaining,
260 (char **)&outp, &outbytes_remaining);
262 if(err == (size_t)-1) {
265 /* Incomplete text, do not report an error */
282 if((p - src) != len) {
288 #ifndef HAVE_NEW_ICONV
289 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode)
294 g_assert_not_reached ();
302 ves_icall_iconv_get_byte_count (gpointer converter, MonoArray *chars, gint32 idx, gint32 count)
304 iconv_t cd = (iconv_t)converter;
310 g_assert (mono_array_length (chars) > idx);
311 g_assert (mono_array_length (chars) >= (idx + count));
313 if (!(len = (mono_array_length (chars) - idx) * 2))
316 src = mono_array_addr (chars, guint16, idx);
318 return iconv_get_length (cd, src, len, TRUE);
322 iconv_convert (iconv_t cd, guchar *src, int len, guchar *dest, int max_len, gboolean encode)
325 guint inbytes_remaining;
326 guint outbytes_remaining;
328 gboolean have_error = FALSE;
335 #ifndef HAVE_NEW_ICONV
336 if (G_BYTE_ORDER == G_LITTLE_ENDIAN && encode) {
339 src = g_memdup (src, len);
340 for (i = 0; i < len; i += 2) {
342 src [i] = src [i + 1];
349 inbytes_remaining = len;
350 outbuf_size = max_len;
352 outbytes_remaining = outbuf_size;
355 err = iconv (cd, (char **)&p, &inbytes_remaining, (char **)&outp, &outbytes_remaining);
357 if(err == (size_t)-1) {
358 if (errno == EINVAL) {
359 /* Incomplete text, do not report an error */
365 if ((p - src) != len) {
371 #ifndef HAVE_NEW_ICONV
372 if (G_BYTE_ORDER == G_LITTLE_ENDIAN) {
376 int mb = max_len - outbytes_remaining;
378 for (i = 0; i < mb; i+=2) {
380 dest [i] = dest [i + 1];
387 g_assert_not_reached ();
390 /* we return the number of bytes written in dest */
391 return max_len - outbytes_remaining;
396 ves_icall_iconv_get_bytes (gpointer converter, MonoArray *chars, gint32 charIndex, gint32 charCount,
397 MonoArray *bytes, gint32 byteIndex)
399 iconv_t cd = (iconv_t)converter;
409 g_assert (mono_array_length (chars) > charIndex);
410 g_assert (mono_array_length (chars) >= (charIndex + charCount));
411 g_assert (mono_array_length (bytes) > byteIndex);
412 g_assert (mono_array_length (chars) >= (byteIndex + charCount));
414 if (!(len = (mono_array_length (chars) - charIndex) * 2))
417 src = mono_array_addr (chars, guint16, charIndex);
418 dest = mono_array_addr (bytes, char, byteIndex);
420 max_len = mono_array_length (bytes) - byteIndex;
422 return iconv_convert (cd, src, len, dest, max_len, TRUE);
426 ves_icall_iconv_get_char_count (gpointer converter, MonoArray *bytes, gint32 idx, gint32 count)
428 iconv_t cd = (iconv_t)converter;
433 g_assert (mono_array_length (bytes) > idx);
434 g_assert (mono_array_length (bytes) >= (idx + count));
436 src = mono_array_addr (bytes, char, idx);
438 /* iconv_get_length () returns the number of bytes */
439 return iconv_get_length (cd, src, (int) count, FALSE) / 2;
443 ves_icall_iconv_get_chars (gpointer converter, MonoArray *bytes, gint32 byteIndex, gint32 byteCount,
444 MonoArray *chars, gint32 charIndex)
446 iconv_t cd = (iconv_t)converter;
453 g_assert (mono_array_length (bytes) > byteIndex);
454 g_assert (mono_array_length (chars) >= (byteIndex + byteCount));
455 g_assert (mono_array_length (chars) > charIndex);
457 src = mono_array_addr (bytes, char, byteIndex);
458 dest = mono_array_addr (chars, guint16, charIndex);
460 max_len = (mono_array_length (chars) - charIndex) * 2;
462 /* iconv_convert () returns the number of bytes */
463 return iconv_convert (cd, src, (int) byteCount, dest, max_len, FALSE) / 2;