2 * locales.c: Culture-sensitive handling
5 * Dick Porter (dick@ximian.com)
6 * Mohammad DAMT (mdamt@cdl2000.com)
8 * (C) 2003 Ximian, Inc.
9 * (C) 2003 PT Cakram Datalingga Duaribu http://www.cdl2000.com
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/object.h>
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/exception.h>
20 #include <mono/metadata/monitor.h>
21 #include <mono/metadata/locales.h>
25 static gint32 string_invariant_compare_char (gunichar2 c1, gunichar2 c2,
27 static gint32 string_invariant_compare (MonoString *str1, gint32 off1,
28 gint32 len1, MonoString *str2,
29 gint32 off2, gint32 len2,
31 static MonoString *string_invariant_replace (MonoString *me,
33 MonoString *newValue);
34 static gint32 string_invariant_indexof (MonoString *source, gint32 sindex,
35 gint32 count, MonoString *value,
37 static gint32 string_invariant_indexof_char (MonoString *source, gint32 sindex,
38 gint32 count, gunichar2 value,
40 static MonoString *string_invariant_tolower (MonoString *this);
41 static MonoString *string_invariant_toupper (MonoString *this);
45 #include <unicode/utypes.h>
46 #include <unicode/ustring.h>
47 #include <unicode/ures.h>
48 #include <unicode/ucol.h>
49 #include <unicode/usearch.h>
51 static MonoString *monostring_from_resource_index (const UResourceBundle *bundle, int32_t idx)
58 res_str=(gunichar2 *)ures_getStringByIndex (bundle, idx, &res_strlen,
64 return(mono_string_from_utf16 (res_str));
67 static UResourceBundle *open_subbundle (const UResourceBundle *bundle,
68 const char *name, int32_t req_count)
70 UResourceBundle *subbundle;
75 subbundle=ures_getByKey (bundle, name, NULL, &ec);
77 /* Couldn't find the subbundle */
81 count=ures_countArrayItems (bundle, name, &ec);
83 /* Couldn't count the subbundle */
84 ures_close (subbundle);
88 if(count!=req_count) {
90 ures_close (subbundle);
97 static MonoArray *build_array (const UResourceBundle *bundle,
98 const char *resname, int32_t req_count)
101 UResourceBundle *subbundle;
104 subbundle=open_subbundle (bundle, resname, req_count);
105 if(subbundle!=NULL) {
106 arr=mono_array_new(mono_domain_get (),
107 mono_defaults.string_class, req_count);
109 for(i=0; i<req_count; i++) {
110 mono_array_set(arr, MonoString *, i, monostring_from_resource_index (subbundle, i));
113 ures_close (subbundle);
119 static MonoDateTimeFormatInfo *create_DateTimeFormat (const char *locale)
121 MonoDateTimeFormatInfo *new_dtf;
123 UResourceBundle *bundle, *subbundle;
126 class=mono_class_from_name (mono_defaults.corlib,
127 "System.Globalization",
128 "DateTimeFormatInfo");
129 new_dtf=(MonoDateTimeFormatInfo *)mono_object_new (mono_domain_get (),
131 mono_runtime_object_init ((MonoObject *)new_dtf);
135 bundle=ures_open (NULL, locale, &ec);
141 subbundle=open_subbundle (bundle, "AmPmMarkers", 2);
142 if(subbundle!=NULL) {
143 new_dtf->AMDesignator=monostring_from_resource_index (subbundle, 0);
144 new_dtf->PMDesignator=monostring_from_resource_index (subbundle, 1);
146 ures_close (subbundle);
149 /* Date/Time patterns. Don't set FullDateTimePattern. As it
150 * seems to always default to LongDatePattern + " " +
151 * LongTimePattern, let the property accessor deal with it.
153 subbundle=open_subbundle (bundle, "DateTimePatterns", 9);
154 if(subbundle!=NULL) {
155 new_dtf->ShortDatePattern=monostring_from_resource_index (subbundle, 7);
156 new_dtf->LongDatePattern=monostring_from_resource_index (subbundle, 5);
157 new_dtf->ShortTimePattern=monostring_from_resource_index (subbundle, 3);
158 new_dtf->LongTimePattern=monostring_from_resource_index (subbundle, 2);
160 /* RFC1123Pattern, SortableDateTimePattern and
161 * UniversalSortableDateTimePattern all seem to be
162 * constant, and all the same as the invariant default
166 ures_close (subbundle);
170 /* Not sure what to do with these yet, so leave them set to
171 * the invariant default
173 set_field_string (new_dtf, "_DateSeparator", str);
174 set_field_string (new_dtf, "_TimeSeparator", str);
175 set_field_string (new_dtf, "_MonthDayPattern", str);
176 set_field_string (new_dtf, "_YearMonthPattern", str);
179 /* Day names. Luckily both ICU and .net start Sunday at index 0 */
180 new_dtf->DayNames=build_array (bundle, "DayNames", 7);
182 /* Abbreviated day names */
183 new_dtf->AbbreviatedDayNames=build_array (bundle, "DayAbbreviations",
187 new_dtf->MonthNames=build_array (bundle, "MonthNames", 12);
189 /* Abbreviated month names */
190 new_dtf->AbbreviatedMonthNames=build_array (bundle,
191 "MonthAbbreviations", 12);
193 /* TODO: DayOfWeek _FirstDayOfWeek, Calendar _Calendar, CalendarWeekRule _CalendarWeekRule */
200 static MonoNumberFormatInfo *create_NumberFormat (const char *locale)
202 MonoNumberFormatInfo *new_nf;
204 MonoMethodDesc* methodDesc;
206 UResourceBundle *bundle, *subbundle, *table_entries;
209 static char country [7]; //FIXME
210 const UChar *res_str;
213 class=mono_class_from_name (mono_defaults.corlib,
214 "System.Globalization",
216 new_nf=(MonoNumberFormatInfo *)mono_object_new (mono_domain_get (),
218 mono_runtime_object_init ((MonoObject *)new_nf);
222 bundle=ures_open (NULL, locale, &ec);
227 /* Number Elements */
229 subbundle=ures_getByKey (bundle, "NumberElements", NULL, &ec);
231 /* Couldn't find the subbundle */
235 count=ures_countArrayItems (bundle, "NumberElements", &ec);
237 /* Couldn't count the subbundle */
238 ures_close (subbundle);
242 if(subbundle!=NULL) {
243 new_nf->numberDecimalSeparator=monostring_from_resource_index (subbundle, 0);
244 new_nf->numberGroupSeparator=monostring_from_resource_index (subbundle, 1);
245 new_nf->percentDecimalSeparator=monostring_from_resource_index (subbundle, 0);
246 new_nf->percentGroupSeparator=monostring_from_resource_index (subbundle, 1);
247 new_nf->percentSymbol=monostring_from_resource_index (subbundle, 3);
248 new_nf->zeroPattern=monostring_from_resource_index (subbundle, 4);
249 new_nf->digitPattern=monostring_from_resource_index (subbundle, 5);
250 new_nf->negativeSign=monostring_from_resource_index (subbundle, 6);
251 new_nf->perMilleSymbol=monostring_from_resource_index (subbundle, 8);
252 new_nf->positiveInfinitySymbol=monostring_from_resource_index (subbundle, 9);
253 /* we dont have this in CLDR, so copy it from positiveInfinitySymbol */
254 new_nf->negativeInfinitySymbol=monostring_from_resource_index (subbundle, 9);
255 new_nf->naNSymbol=monostring_from_resource_index (subbundle, 10);
256 new_nf->currencyDecimalSeparator=monostring_from_resource_index (subbundle, 0);
257 new_nf->currencyGroupSeparator=monostring_from_resource_index (subbundle, 1);
259 ures_close (subbundle);
262 /* get country name */
264 uloc_getCountry (locale, country, sizeof (country), &ec);
265 if (U_SUCCESS (ec)) {
267 /* find country name in root.CurrencyMap */
268 subbundle = ures_getByKey (bundle, "CurrencyMap", NULL, &ec);
269 if (U_SUCCESS (ec)) {
271 /* get currency id for specified country */
272 table_entries = ures_getByKey (subbundle, country, NULL, &ec);
273 if (U_SUCCESS (ec)) {
274 ures_close (subbundle);
277 res_str = ures_getStringByIndex (
278 table_entries, 0, &res_strlen, &ec);
280 /* now we have currency id string */
281 ures_close (table_entries);
283 u_UCharsToChars (res_str, country,
287 /* find currency string in locale data */
288 subbundle = ures_getByKey (
289 bundle, "Currencies",
292 if (U_SUCCESS (ec)) {
294 /* find currency symbol under specified currency id */
295 table_entries = ures_getByKey (subbundle, country, NULL, &ec);
296 if (U_SUCCESS (ec)) {
297 /* get the first string only,
298 * the second is international currency symbol (not used)*/
299 new_nf->currencySymbol=monostring_from_resource_index (table_entries, 0);
300 ures_close (table_entries);
302 ures_close (subbundle);
310 subbundle=open_subbundle (bundle, "NumberPatterns", 4);
311 if(subbundle!=NULL) {
312 new_nf->decimalFormats=monostring_from_resource_index (subbundle, 0);
313 new_nf->currencyFormats=monostring_from_resource_index (subbundle, 1);
314 new_nf->percentFormats=monostring_from_resource_index (subbundle, 2);
315 ures_close (subbundle);
317 /* calls InitPatterns to parse the patterns
319 methodDesc = mono_method_desc_new (
320 "System.Globalization.NumberFormatInfo:InitPatterns()",
322 method = mono_method_desc_search_in_class (methodDesc, class);
324 mono_runtime_invoke (method, new_nf, NULL, NULL);
326 g_warning (G_GNUC_PRETTY_FUNCTION ": Runtime mismatch with class lib! (Looking for System.Globalization.NumberFormatInfo:InitPatterns())");
335 static char *mono_string_to_icu_locale (MonoString *locale)
338 char *passed_locale, *icu_locale=NULL;
339 int32_t loc_len, ret;
341 passed_locale=mono_string_to_utf8 (locale);
344 ret=uloc_getName (passed_locale, NULL, 0, &ec);
345 if(ec==U_BUFFER_OVERFLOW_ERROR) {
348 icu_locale=(char *)g_malloc0 (sizeof(char)*loc_len);
349 ret=uloc_getName (passed_locale, icu_locale, loc_len, &ec);
351 g_free (passed_locale);
356 void ves_icall_System_Globalization_CultureInfo_construct_internal_locale (MonoCultureInfo *this, MonoString *locale)
362 int32_t str_len, ret;
366 icu_locale=mono_string_to_icu_locale (locale);
367 if(icu_locale==NULL) {
368 /* Something went wrong */
369 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "SystemException"));
373 /* Fill in the static fields */
375 /* TODO: Calendar, InstalledUICulture, OptionalCalendars,
379 str_len=256; /* Should be big enough for anything */
380 str=(char *)g_malloc0 (sizeof(char)*str_len);
381 ustr=(UChar *)g_malloc0 (sizeof(UChar)*str_len);
385 ret=uloc_getDisplayName (icu_locale, "en", ustr, str_len, &ec);
386 if(U_SUCCESS (ec) && ret<str_len) {
387 this->englishname=mono_string_from_utf16 ((gunichar2 *)ustr);
390 ret=uloc_getDisplayName (icu_locale, uloc_getDefault (), ustr, str_len,
392 if(U_SUCCESS (ec) && ret<str_len) {
393 this->displayname=mono_string_from_utf16 ((gunichar2 *)ustr);
396 ret=uloc_getDisplayName (icu_locale, icu_locale, ustr, str_len, &ec);
397 if(U_SUCCESS (ec) && ret<str_len) {
398 this->nativename=mono_string_from_utf16 ((gunichar2 *)ustr);
401 this->iso3lang=mono_string_new_wrapper (uloc_getISO3Language (icu_locale));
403 ret=uloc_getLanguage (icu_locale, str, str_len, &ec);
404 if(U_SUCCESS (ec) && ret<str_len) {
405 this->iso2lang=mono_string_new_wrapper (str);
408 this->datetime_format=create_DateTimeFormat (icu_locale);
409 this->number_format=create_NumberFormat (icu_locale);
416 void ves_icall_System_Globalization_CompareInfo_construct_compareinfo (MonoCompareInfo *comp, MonoString *locale)
425 g_message (G_GNUC_PRETTY_FUNCTION ": Constructing collator for locale [%s]", mono_string_to_utf8 (locale));
428 icu_locale=mono_string_to_icu_locale (locale);
429 if(icu_locale==NULL) {
430 /* Something went wrong */
431 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "SystemException"));
436 coll=ucol_open (icu_locale, &ec);
438 comp->ICU_collator=coll;
440 comp->ICU_collator=NULL;
446 /* Set up the collator to reflect the options required. Some of these
447 * options clash, as they adjust the collator strength level. Try to
448 * make later checks reduce the strength level, and attempt to take
449 * previous options into account.
451 * Don't bother to check the error returns when setting the
452 * attributes, as a failure here is hardly grounds to error out.
454 static void set_collator_options (UCollator *coll, gint32 options)
456 UErrorCode ec=U_ZERO_ERROR;
458 /* Set up the defaults */
459 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
461 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_OFF, &ec);
463 /* Do this first so other options will override the quaternary
464 * level strength setting if necessary
466 if(!(options & CompareOptions_IgnoreKanaType)) {
467 ucol_setAttribute (coll, UCOL_HIRAGANA_QUATERNARY_MODE,
469 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_QUATERNARY, &ec);
472 /* Word sort, the default */
473 if(!(options & CompareOptions_StringSort)) {
474 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING,
476 /* Tertiary strength is the default, but it might have
477 * been set to quaternary above. (We don't want that
478 * here, because that will order all the punctuation
479 * first instead of just ignoring it.)
481 * Unfortunately, tertiary strength with
482 * ALTERNATE_HANDLING==SHIFTED means that '/' and '@'
483 * compare to equal, which has the nasty side effect
484 * of killing mcs :-( (We can't specify a
485 * culture-insensitive compare, because
486 * String.StartsWith doesn't have that option.)
488 * ALTERNATE_HANDLING==SHIFTED is needed to accomplish
489 * the word-sorting-ignoring-punctuation feature. So
490 * we have to live with the slightly mis-ordered
491 * punctuation and a working mcs...
493 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_QUATERNARY, &ec);
496 if(options & CompareOptions_IgnoreCase) {
497 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_SECONDARY, &ec);
500 if(options & CompareOptions_IgnoreWidth) {
501 /* Kana width is a tertiary strength difference. This
502 * will totally break the !IgnoreKanaType option
504 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_SECONDARY, &ec);
507 if(options & CompareOptions_IgnoreNonSpace) {
508 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
509 /* We can still compare case even when just checking
512 if(!(options & CompareOptions_IgnoreCase) ||
513 !(options & CompareOptions_IgnoreWidth)) {
514 /* Not sure if CASE_LEVEL handles kana width
516 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON,
521 if(options & CompareOptions_IgnoreSymbols) {
522 /* Don't know what to do here */
525 if(options == CompareOptions_Ordinal) {
526 /* This one is handled elsewhere */
530 gint32 ves_icall_System_Globalization_CompareInfo_internal_compare (MonoCompareInfo *this, MonoString *str1, gint32 off1, gint32 len1, MonoString *str2, gint32 off2, gint32 len2, gint32 options)
533 UCollationResult result;
538 g_message (G_GNUC_PRETTY_FUNCTION ": Comparing [%s] and [%s]", mono_string_to_utf8 (str1), mono_string_to_utf8 (str2));
541 coll=this->ICU_collator;
544 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", this->lcid);
547 if(coll==NULL || this->lcid==0x007F ||
548 options & CompareOptions_Ordinal) {
550 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
553 return(string_invariant_compare (str1, off1, len1, str2, off2,
557 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
559 set_collator_options (coll, options);
561 result=ucol_strcoll (coll, mono_string_chars (str1)+off1, len1,
562 mono_string_chars (str2)+off2, len2);
564 mono_monitor_exit ((MonoObject *)this);
567 g_message (G_GNUC_PRETTY_FUNCTION ": Comparison of [%s] and [%s] returning %d", mono_string_to_utf8 (str1), mono_string_to_utf8 (str2), result);
573 void ves_icall_System_Globalization_CompareInfo_free_internal_collator (MonoCompareInfo *this)
579 coll=this->ICU_collator;
585 void ves_icall_System_Globalization_CompareInfo_assign_sortkey (MonoCompareInfo *this, MonoSortKey *key, MonoString *source, gint32 options)
594 coll=this->ICU_collator;
596 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "SystemException"));
600 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
602 set_collator_options (coll, options);
604 keylen=ucol_getSortKey (coll, mono_string_chars (source), -1, NULL, 0);
605 keybuf=g_malloc (sizeof(char)* keylen);
606 ucol_getSortKey (coll, mono_string_chars (source), -1, keybuf, keylen);
608 mono_monitor_exit ((MonoObject *)this);
610 arr=mono_array_new (mono_domain_get (), mono_defaults.byte_class,
612 for(i=0; i<keylen; i++) {
613 mono_array_set (arr, guint8, i, keybuf[i]);
621 int ves_icall_System_Globalization_CompareInfo_internal_index (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, MonoString *value, gint32 options, MonoBoolean first)
626 UStringSearch *search;
632 g_message (G_GNUC_PRETTY_FUNCTION ": Finding %s [%s] in [%s] (sindex %d,count %d)", first?"first":"last", mono_string_to_utf8 (value), mono_string_to_utf8 (source), sindex, count);
635 coll=this->ICU_collator;
638 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", this->lcid);
641 if(coll==NULL || this->lcid==0x007F ||
642 options & CompareOptions_Ordinal) {
644 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
647 return(string_invariant_indexof (source, sindex, count, value,
651 usrcstr=g_malloc0 (sizeof(UChar)*(count+1));
653 memcpy (usrcstr, mono_string_chars (source)+sindex,
654 sizeof(UChar)*count);
656 memcpy (usrcstr, mono_string_chars (source)+sindex-count+1,
657 sizeof(UChar)*count);
660 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
664 /* Need to set the collator to a fairly weak level, so that it
665 * treats characters that can be written differently as
666 * identical (eg "ß" and "ss", "æ" and "ae" or "ä" etc.) Note
667 * that this means that the search string and the original
668 * text might have differing lengths.
670 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
672 /* Still notice case differences though (normally a tertiary
675 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON, &ec);
677 /* Don't ignore some codepoints */
678 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
681 search=usearch_openFromCollator (mono_string_chars (value), -1, usrcstr, -1, coll, NULL,
685 pos=usearch_first (search, &ec);
687 pos=usearch_last (search, &ec);
690 if(pos!=USEARCH_DONE) {
692 g_message (G_GNUC_PRETTY_FUNCTION
693 ": Got match at %d (sindex %d) len %d", pos,
694 sindex, usearch_getMatchedLength (search));
700 pos+=(sindex-count+1);
705 g_message (G_GNUC_PRETTY_FUNCTION ": usearch_open error: %s",
709 usearch_close (search);
711 mono_monitor_exit ((MonoObject *)this);
718 int ves_icall_System_Globalization_CompareInfo_internal_index_char (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, gunichar2 value, gint32 options, MonoBoolean first)
721 UChar *usrcstr, uvalstr[2]={0, 0};
723 UStringSearch *search;
729 g_message (G_GNUC_PRETTY_FUNCTION ": Finding %s 0x%0x in [%s] (sindex %d,count %d)", first?"first":"last", value, mono_string_to_utf8 (source), sindex, count);
732 coll=this->ICU_collator;
735 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", this->lcid);
738 if(coll==NULL || this->lcid==0x007F ||
739 options & CompareOptions_Ordinal) {
741 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
744 return(string_invariant_indexof_char (source, sindex, count,
748 usrcstr=g_malloc0 (sizeof(UChar)*(count+1));
750 memcpy (usrcstr, mono_string_chars (source)+sindex,
751 sizeof(UChar)*count);
753 memcpy (usrcstr, mono_string_chars (source)+sindex-count+1,
754 sizeof(UChar)*count);
758 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
762 /* Need to set the collator to a fairly weak level, so that it
763 * treats characters that can be written differently as
764 * identical (eg "ß" and "ss", "æ" and "ae" or "ä" etc.) Note
765 * that this means that the search string and the original
766 * text might have differing lengths.
768 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
770 /* Still notice case differences though (normally a tertiary
773 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON, &ec);
775 /* Don't ignore some codepoints */
776 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
779 search=usearch_openFromCollator (uvalstr, -1, usrcstr, -1, coll, NULL,
783 pos=usearch_first (search, &ec);
785 pos=usearch_last (search, &ec);
788 if(pos!=USEARCH_DONE) {
790 g_message (G_GNUC_PRETTY_FUNCTION
791 ": Got match at %d (sindex %d) len %d", pos,
792 sindex, usearch_getMatchedLength (search));
798 pos+=(sindex-count+1);
803 g_message (G_GNUC_PRETTY_FUNCTION ": usearch_open error: %s",
807 usearch_close (search);
809 mono_monitor_exit ((MonoObject *)this);
816 int ves_icall_System_Threading_Thread_current_lcid (void)
820 return(uloc_getLCID (uloc_getDefault ()));
823 MonoString *ves_icall_System_String_InternalReplace_Str_Comp (MonoString *this, MonoString *old, MonoString *new, MonoCompareInfo *comp)
825 MonoString *ret=NULL;
828 UStringSearch *search;
833 g_message (G_GNUC_PRETTY_FUNCTION ": Replacing [%s] with [%s] in [%s]", mono_string_to_utf8 (old), mono_string_to_utf8 (new), mono_string_to_utf8 (this));
836 coll=comp->ICU_collator;
839 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", comp->lcid);
842 if(coll==NULL || comp->lcid==0x007F) {
844 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
847 return(string_invariant_replace (this, old, new));
850 mono_monitor_try_enter ((MonoObject *)comp, INFINITE);
854 /* Need to set the collator to a fairly weak level, so that it
855 * treats characters that can be written differently as
856 * identical (eg "ß" and "ss", "æ" and "ae" or "ä" etc.) Note
857 * that this means that the search string and the original
858 * text might have differing lengths.
860 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
862 /* Still notice case differences though (normally a tertiary
865 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON, &ec);
867 /* Don't ignore some codepoints */
868 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
871 search=usearch_openFromCollator (mono_string_chars (old), -1,
872 mono_string_chars (this), -1, coll,
875 int pos, oldpos, len_delta=0;
876 int32_t newstr_len=mono_string_length (new), match_len;
879 for(pos=usearch_first (search, &ec);
881 pos=usearch_next (search, &ec)) {
882 /* ICU usearch currently ignores most of the collator
885 * Check the returned match to see if it really
886 * does match properly...
888 match_len = usearch_getMatchedLength (search);
889 match=(UChar *)g_malloc0 (sizeof(UChar) * (match_len + 1));
890 usearch_getMatchedText (search, match, match_len, &ec);
892 if (ucol_strcoll (coll, match, -1, mono_string_chars (old), -1) == UCOL_EQUAL) {
893 /* OK, we really did get a match */
895 g_message (G_GNUC_PRETTY_FUNCTION
896 ": Got match at %d len %d", pos,
900 len_delta += (newstr_len - match_len);
904 g_message (G_GNUC_PRETTY_FUNCTION
905 ": Got false match at %d len %d",
912 g_message (G_GNUC_PRETTY_FUNCTION
913 ": New string length is %d (delta %d)",
914 mono_string_length (this)+len_delta, len_delta);
917 uret=(UChar *)g_malloc0 (sizeof(UChar) * (mono_string_length (this)+len_delta+2));
919 for(oldpos=0, pos=usearch_first (search, &ec);
921 pos=usearch_next (search, &ec)) {
922 match_len = usearch_getMatchedLength (search);
923 match=(UChar *)g_malloc0 (sizeof(UChar) * (match_len + 1));
924 usearch_getMatchedText (search, match, match_len, &ec);
926 /* Add the unmatched text */
927 u_strncat (uret, mono_string_chars (this)+oldpos,
929 if (ucol_strcoll (coll, match, -1, mono_string_chars (old), -1) == UCOL_EQUAL) {
930 /* Then the replacement */
931 u_strcat (uret, mono_string_chars (new));
933 /* Then the original, because this is a
936 u_strncat (uret, mono_string_chars (this)+pos,
939 oldpos=pos+match_len;
943 /* Finish off with the trailing unmatched text */
944 u_strcat (uret, mono_string_chars (this)+oldpos);
946 ret=mono_string_from_utf16 ((gunichar2 *)uret);
948 g_message (G_GNUC_PRETTY_FUNCTION ": usearch_open error: %s",
952 usearch_close (search);
954 mono_monitor_exit ((MonoObject *)comp);
957 g_message (G_GNUC_PRETTY_FUNCTION ": Replacing [%s] with [%s] in [%s] returns [%s]", mono_string_to_utf8 (old), mono_string_to_utf8 (new), mono_string_to_utf8 (this), mono_string_to_utf8 (ret));
963 MonoString *ves_icall_System_String_InternalToLower_Comp (MonoString *this, MonoCultureInfo *cult)
972 g_message (G_GNUC_PRETTY_FUNCTION ": [%s]",
973 mono_string_to_utf8 (this));
977 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", cult->lcid);
980 if(cult->lcid==0x007F) {
982 g_message (G_GNUC_PRETTY_FUNCTION
983 ": Invariant, using shortcut");
986 return(string_invariant_tolower (this));
989 icu_loc=mono_string_to_icu_locale (cult->icu_name);
991 mono_raise_exception ((MonoException *)mono_exception_from_name (mono_defaults.corlib, "System", "SystemException"));
995 udest=(UChar *)g_malloc0 (sizeof(UChar)*(mono_string_length (this)+1));
997 /* According to the docs, this might result in a longer or
998 * shorter string than we started with...
1002 len=u_strToLower (udest, mono_string_length (this)+1,
1003 mono_string_chars (this), -1, icu_loc, &ec);
1004 if(ec==U_BUFFER_OVERFLOW_ERROR ||
1005 ec==U_STRING_NOT_TERMINATED_WARNING) {
1007 udest=(UChar *)g_malloc0 (sizeof(UChar)*(len+1));
1008 len=u_strToLower (udest, len+1, mono_string_chars (this), -1,
1012 if(U_SUCCESS (ec)) {
1013 ret=mono_string_from_utf16 ((gunichar2 *)udest);
1015 g_message (G_GNUC_PRETTY_FUNCTION ": u_strToLower error: %s",
1017 /* return something */
1025 g_message (G_GNUC_PRETTY_FUNCTION ": returning [%s]",
1026 mono_string_to_utf8 (ret));
1032 MonoString *ves_icall_System_String_InternalToUpper_Comp (MonoString *this, MonoCultureInfo *cult)
1041 g_message (G_GNUC_PRETTY_FUNCTION ": [%s]",
1042 mono_string_to_utf8 (this));
1046 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", cult->lcid);
1049 if(cult->lcid==0x007F) {
1051 g_message (G_GNUC_PRETTY_FUNCTION
1052 ": Invariant, using shortcut");
1055 return(string_invariant_toupper (this));
1058 icu_loc=mono_string_to_icu_locale (cult->icu_name);
1060 mono_raise_exception ((MonoException *)mono_exception_from_name (mono_defaults.corlib, "System", "SystemException"));
1064 udest=(UChar *)g_malloc0 (sizeof(UChar)*(mono_string_length (this)+1));
1066 /* According to the docs, this might result in a longer or
1067 * shorter string than we started with...
1071 len=u_strToUpper (udest, mono_string_length (this)+1,
1072 mono_string_chars (this), -1, icu_loc, &ec);
1073 if(ec==U_BUFFER_OVERFLOW_ERROR ||
1074 ec==U_STRING_NOT_TERMINATED_WARNING) {
1076 udest=(UChar *)g_malloc0 (sizeof(UChar)*(len+1));
1077 len=u_strToUpper (udest, len+1, mono_string_chars (this), -1,
1081 if(U_SUCCESS (ec)) {
1082 ret=mono_string_from_utf16 ((gunichar2 *)udest);
1084 g_message (G_GNUC_PRETTY_FUNCTION ": u_strToUpper error: %s",
1086 /* return something */
1094 g_message (G_GNUC_PRETTY_FUNCTION ": returning [%s]",
1095 mono_string_to_utf8 (ret));
1101 #else /* HAVE_ICU */
1102 void ves_icall_System_Globalization_CultureInfo_construct_internal_locale (MonoCultureInfo *this, MonoString *locale)
1104 MONO_ARCH_SAVE_REGS;
1106 /* Always claim "unknown locale" if we don't have ICU (only
1107 * called for non-invariant locales)
1109 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "ArgumentException"));
1112 void ves_icall_System_Globalization_CompareInfo_construct_compareinfo (MonoCompareInfo *comp, MonoString *locale)
1114 /* Nothing to do here */
1117 int ves_icall_System_Globalization_CompareInfo_internal_compare (MonoCompareInfo *this, MonoString *str1, gint32 off1, gint32 len1, MonoString *str2, gint32 off2, gint32 len2, gint32 options)
1119 MONO_ARCH_SAVE_REGS;
1121 /* Do a normal ascii string compare, as we only know the
1122 * invariant locale if we dont have ICU
1124 return(string_invariant_compare (str1, off1, len1, str2, off2, len2,
1128 void ves_icall_System_Globalization_CompareInfo_free_internal_collator (MonoCompareInfo *this)
1130 /* Nothing to do here */
1133 void ves_icall_System_Globalization_CompareInfo_assign_sortkey (MonoCompareInfo *this, MonoSortKey *key, MonoString *source, gint32 options)
1138 MONO_ARCH_SAVE_REGS;
1140 keylen=mono_string_length (source);
1142 arr=mono_array_new (mono_domain_get (), mono_defaults.byte_class,
1144 for(i=0; i<keylen; i++) {
1145 mono_array_set (arr, guint8, i, mono_string_chars (source)[i]);
1151 int ves_icall_System_Globalization_CompareInfo_internal_index (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, MonoString *value, gint32 options, MonoBoolean first)
1153 MONO_ARCH_SAVE_REGS;
1155 return(string_invariant_indexof (source, sindex, count, value, first));
1158 int ves_icall_System_Globalization_CompareInfo_internal_index_char (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, gunichar2 value, gint32 options, MonoBoolean first)
1160 MONO_ARCH_SAVE_REGS;
1162 return(string_invariant_indexof_char (source, sindex, count, value,
1166 int ves_icall_System_Threading_Thread_current_lcid (void)
1168 MONO_ARCH_SAVE_REGS;
1174 MonoString *ves_icall_System_String_InternalReplace_Str_Comp (MonoString *this, MonoString *old, MonoString *new, MonoCompareInfo *comp)
1176 MONO_ARCH_SAVE_REGS;
1178 /* Do a normal ascii string compare and replace, as we only
1179 * know the invariant locale if we dont have ICU
1181 return(string_invariant_replace (this, old, new));
1184 MonoString *ves_icall_System_String_InternalToLower_Comp (MonoString *this, MonoCultureInfo *cult)
1186 MONO_ARCH_SAVE_REGS;
1188 return(string_invariant_tolower (this));
1191 MonoString *ves_icall_System_String_InternalToUpper_Comp (MonoString *this, MonoCultureInfo *cult)
1193 MONO_ARCH_SAVE_REGS;
1195 return(string_invariant_toupper (this));
1198 #endif /* HAVE_ICU */
1200 static gint32 string_invariant_compare_char (gunichar2 c1, gunichar2 c2,
1204 GUnicodeType c1type, c2type;
1206 c1type = g_unichar_type (c1);
1207 c2type = g_unichar_type (c2);
1209 if (options & CompareOptions_IgnoreCase) {
1210 result = (gint32) (c1type != G_UNICODE_LOWERCASE_LETTER ? g_unichar_tolower(c1) : c1) - (c2type != G_UNICODE_LOWERCASE_LETTER ? g_unichar_tolower(c2) : c2);
1211 } else if (options & CompareOptions_Ordinal) {
1212 // Rotor/ms return the full value just not -1 and 1
1213 return (gint32) c1 - c2;
1215 /* No options. Kana, symbol and spacing options don't
1216 * apply to the invariant culture.
1218 if (c1type == G_UNICODE_UPPERCASE_LETTER &&
1219 c2type == G_UNICODE_LOWERCASE_LETTER) {
1223 if (c1type == G_UNICODE_LOWERCASE_LETTER &&
1224 c2type == G_UNICODE_UPPERCASE_LETTER) {
1228 result = (gint32) c1 - c2;
1231 return ((result < 0) ? -1 : (result > 0) ? 1 : 0);
1234 static gint32 string_invariant_compare (MonoString *str1, gint32 off1,
1235 gint32 len1, MonoString *str2,
1236 gint32 off2, gint32 len2,
1239 /* c translation of C# code from old string.cs.. :) */
1252 ustr1 = mono_string_chars(str1)+off1;
1253 ustr2 = mono_string_chars(str2)+off2;
1257 for (pos = 0; pos != length; pos++) {
1258 if (pos >= len1 || pos >= len2)
1261 charcmp = string_invariant_compare_char(ustr1[pos], ustr2[pos],
1268 /* the lesser wins, so if we have looped until length we just
1269 * need to check the last char
1271 if (pos == length) {
1272 return(string_invariant_compare_char(ustr1[pos - 1],
1273 ustr2[pos - 1], options));
1276 /* Test if one of the strings has been compared to the end */
1283 } else if (pos >= len2) {
1287 /* if not, check our last char only.. (can this happen?) */
1288 return(string_invariant_compare_char(ustr1[pos], ustr2[pos], options));
1291 static MonoString *string_invariant_replace (MonoString *me,
1292 MonoString *oldValue,
1293 MonoString *newValue)
1297 gunichar2 *dest=NULL; /* shut gcc up */
1299 gunichar2 *newstr=NULL; /* shut gcc up here too */
1310 oldstr = mono_string_chars(oldValue);
1311 oldstrlen = mono_string_length(oldValue);
1313 if (NULL != newValue) {
1314 newstr = mono_string_chars(newValue);
1315 newstrlen = mono_string_length(newValue);
1319 src = mono_string_chars(me);
1320 srclen = mono_string_length(me);
1322 if (oldstrlen != newstrlen) {
1323 for (i = 0; i <= srclen - oldstrlen; i++)
1324 if (0 == memcmp(src + i, oldstr, oldstrlen * sizeof(gunichar2)))
1328 newsize = srclen + ((newstrlen - oldstrlen) * occurr);
1334 while (i < srclen) {
1335 if (0 == memcmp(src + i, oldstr, oldstrlen * sizeof(gunichar2))) {
1337 ret = mono_string_new_size( mono_domain_get (), newsize);
1338 dest = mono_string_chars(ret);
1339 memcpy (dest, src, i * sizeof(gunichar2));
1341 if (newstrlen > 0) {
1342 memcpy(dest + destpos, newstr, newstrlen * sizeof(gunichar2));
1343 destpos += newstrlen;
1347 } else if (ret != NULL) {
1348 dest[destpos] = src[i];
1360 static gint32 string_invariant_indexof (MonoString *source, gint32 sindex,
1361 gint32 count, MonoString *value,
1369 lencmpstr = mono_string_length(value);
1371 src = mono_string_chars(source);
1372 cmpstr = mono_string_chars(value);
1376 for(pos=sindex;pos <= sindex+count;pos++) {
1377 for(i=0;src[pos+i]==cmpstr[i];) {
1378 if(++i==lencmpstr) {
1386 for(pos=sindex-lencmpstr+1;pos>sindex-count;pos--) {
1387 if(memcmp (src+pos, cmpstr,
1388 lencmpstr*sizeof(gunichar2))==0) {
1397 static gint32 string_invariant_indexof_char (MonoString *source, gint32 sindex,
1398 gint32 count, gunichar2 value,
1404 src = mono_string_chars(source);
1406 for (pos = sindex; pos != count + sindex; pos++) {
1407 if (src [pos] == value) {
1414 for (pos = sindex; pos > sindex - count; pos--) {
1415 if (src [pos] == value)
1423 static MonoString *string_invariant_tolower (MonoString *this)
1430 ret = mono_string_new_size(mono_domain_get (),
1431 mono_string_length(this));
1433 src = mono_string_chars (this);
1434 dest = mono_string_chars (ret);
1436 for (i = 0; i < mono_string_length (this); ++i) {
1437 dest[i] = g_unichar_tolower(src[i]);
1443 static MonoString *string_invariant_toupper (MonoString *this)
1450 ret = mono_string_new_size(mono_domain_get (),
1451 mono_string_length(this));
1453 src = mono_string_chars (this);
1454 dest = mono_string_chars (ret);
1456 for (i = 0; i < mono_string_length (this); ++i) {
1457 dest[i] = g_unichar_toupper(src[i]);