2 * locales.c: Culture-sensitive handling
5 * Dick Porter (dick@ximian.com)
6 * Mohammad DAMT (mdamt@cdl2000.com)
8 * (C) 2003 Ximian, Inc.
9 * (C) 2003 PT Cakram Datalingga Duaribu http://www.cdl2000.com
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/object.h>
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/exception.h>
20 #include <mono/metadata/monitor.h>
21 #include <mono/metadata/locales.h>
25 static gint32 string_invariant_compare_char (gunichar2 c1, gunichar2 c2,
27 static gint32 string_invariant_compare (MonoString *str1, gint32 off1,
28 gint32 len1, MonoString *str2,
29 gint32 off2, gint32 len2,
31 static MonoString *string_invariant_replace (MonoString *me,
33 MonoString *newValue);
34 static gint32 string_invariant_indexof (MonoString *source, gint32 sindex,
35 gint32 count, MonoString *value,
37 static gint32 string_invariant_indexof_char (MonoString *source, gint32 sindex,
38 gint32 count, gunichar2 value,
40 static MonoString *string_invariant_tolower (MonoString *this);
41 static MonoString *string_invariant_toupper (MonoString *this);
45 #include <unicode/utypes.h>
46 #include <unicode/ustring.h>
47 #include <unicode/ures.h>
48 #include <unicode/ucol.h>
49 #include <unicode/usearch.h>
51 static MonoString *monostring_from_resource_index (const UResourceBundle *bundle, int32_t idx)
58 res_str=(gunichar2 *)ures_getStringByIndex (bundle, idx, &res_strlen,
64 return(mono_string_from_utf16 (res_str));
67 static UResourceBundle *open_subbundle (const UResourceBundle *bundle,
68 const char *name, int32_t req_count)
70 UResourceBundle *subbundle;
75 subbundle=ures_getByKey (bundle, name, NULL, &ec);
77 /* Couldn't find the subbundle */
81 count=ures_countArrayItems (bundle, name, &ec);
83 /* Couldn't count the subbundle */
84 ures_close (subbundle);
88 if(count!=req_count) {
90 ures_close (subbundle);
97 static MonoArray *build_array (const UResourceBundle *bundle,
98 const char *resname, int32_t req_count)
101 UResourceBundle *subbundle;
104 subbundle=open_subbundle (bundle, resname, req_count);
105 if(subbundle!=NULL) {
106 arr=mono_array_new(mono_domain_get (),
107 mono_defaults.string_class, req_count);
109 for(i=0; i<req_count; i++) {
110 mono_array_set(arr, MonoString *, i, monostring_from_resource_index (subbundle, i));
113 ures_close (subbundle);
119 static MonoDateTimeFormatInfo *create_DateTimeFormat (const char *locale)
121 MonoDateTimeFormatInfo *new_dtf;
123 UResourceBundle *bundle, *subbundle;
126 class=mono_class_from_name (mono_defaults.corlib,
127 "System.Globalization",
128 "DateTimeFormatInfo");
129 new_dtf=(MonoDateTimeFormatInfo *)mono_object_new (mono_domain_get (),
131 mono_runtime_object_init ((MonoObject *)new_dtf);
135 bundle=ures_open (NULL, locale, &ec);
141 subbundle=open_subbundle (bundle, "AmPmMarkers", 2);
142 if(subbundle!=NULL) {
143 new_dtf->AMDesignator=monostring_from_resource_index (subbundle, 0);
144 new_dtf->PMDesignator=monostring_from_resource_index (subbundle, 1);
146 ures_close (subbundle);
149 /* Date/Time patterns. Don't set FullDateTimePattern. As it
150 * seems to always default to LongDatePattern + " " +
151 * LongTimePattern, let the property accessor deal with it.
153 subbundle=open_subbundle (bundle, "DateTimePatterns", 9);
154 if(subbundle!=NULL) {
155 new_dtf->ShortDatePattern=monostring_from_resource_index (subbundle, 7);
156 new_dtf->LongDatePattern=monostring_from_resource_index (subbundle, 5);
157 new_dtf->ShortTimePattern=monostring_from_resource_index (subbundle, 3);
158 new_dtf->LongTimePattern=monostring_from_resource_index (subbundle, 2);
160 /* RFC1123Pattern, SortableDateTimePattern and
161 * UniversalSortableDateTimePattern all seem to be
162 * constant, and all the same as the invariant default
166 ures_close (subbundle);
170 /* Not sure what to do with these yet, so leave them set to
171 * the invariant default
173 set_field_string (new_dtf, "_DateSeparator", str);
174 set_field_string (new_dtf, "_TimeSeparator", str);
175 set_field_string (new_dtf, "_MonthDayPattern", str);
176 set_field_string (new_dtf, "_YearMonthPattern", str);
179 /* Day names. Luckily both ICU and .net start Sunday at index 0 */
180 new_dtf->DayNames=build_array (bundle, "DayNames", 7);
182 /* Abbreviated day names */
183 new_dtf->AbbreviatedDayNames=build_array (bundle, "DayAbbreviations",
187 new_dtf->MonthNames=build_array (bundle, "MonthNames", 12);
189 /* Abbreviated month names */
190 new_dtf->AbbreviatedMonthNames=build_array (bundle,
191 "MonthAbbreviations", 12);
193 /* TODO: DayOfWeek _FirstDayOfWeek, Calendar _Calendar, CalendarWeekRule _CalendarWeekRule */
200 static MonoNumberFormatInfo *create_NumberFormat (const char *locale)
202 MonoNumberFormatInfo *new_nf;
204 MonoMethodDesc* methodDesc;
206 UResourceBundle *bundle, *subbundle, *table_entries;
209 static char country [7]; //FIXME
210 const UChar *res_str;
213 class=mono_class_from_name (mono_defaults.corlib,
214 "System.Globalization",
216 new_nf=(MonoNumberFormatInfo *)mono_object_new (mono_domain_get (),
218 mono_runtime_object_init ((MonoObject *)new_nf);
222 bundle=ures_open (NULL, locale, &ec);
227 /* Number Elements */
229 subbundle=ures_getByKey (bundle, "NumberElements", NULL, &ec);
231 /* Couldn't find the subbundle */
235 count=ures_countArrayItems (bundle, "NumberElements", &ec);
237 /* Couldn't count the subbundle */
238 ures_close (subbundle);
242 if(subbundle!=NULL) {
243 new_nf->numberDecimalSeparator=monostring_from_resource_index (subbundle, 0);
244 new_nf->numberGroupSeparator=monostring_from_resource_index (subbundle, 1);
245 new_nf->percentDecimalSeparator=monostring_from_resource_index (subbundle, 0);
246 new_nf->percentGroupSeparator=monostring_from_resource_index (subbundle, 1);
247 new_nf->percentSymbol=monostring_from_resource_index (subbundle, 3);
248 new_nf->zeroPattern=monostring_from_resource_index (subbundle, 4);
249 new_nf->digitPattern=monostring_from_resource_index (subbundle, 5);
250 new_nf->negativeSign=monostring_from_resource_index (subbundle, 6);
251 new_nf->perMilleSymbol=monostring_from_resource_index (subbundle, 8);
252 new_nf->positiveInfinitySymbol=monostring_from_resource_index (subbundle, 9);
253 /* we dont have this in CLDR, so copy it from positiveInfinitySymbol */
254 new_nf->negativeInfinitySymbol=monostring_from_resource_index (subbundle, 9);
255 new_nf->naNSymbol=monostring_from_resource_index (subbundle, 10);
256 new_nf->currencyDecimalSeparator=monostring_from_resource_index (subbundle, 0);
257 new_nf->currencyGroupSeparator=monostring_from_resource_index (subbundle, 1);
259 ures_close (subbundle);
262 /* get country name */
264 uloc_getCountry (locale, country, sizeof (country), &ec);
265 if (U_SUCCESS (ec)) {
267 /* find country name in root.CurrencyMap */
268 subbundle = ures_getByKey (bundle, "CurrencyMap", NULL, &ec);
269 if (U_SUCCESS (ec)) {
271 /* get currency id for specified country */
272 table_entries = ures_getByKey (subbundle, country, NULL, &ec);
273 if (U_SUCCESS (ec)) {
274 ures_close (subbundle);
277 res_str = ures_getStringByIndex (
278 table_entries, 0, &res_strlen, &ec);
280 /* now we have currency id string */
281 ures_close (table_entries);
283 u_UCharsToChars (res_str, country,
287 /* find currency string in locale data */
288 subbundle = ures_getByKey (
289 bundle, "Currencies",
292 if (U_SUCCESS (ec)) {
294 /* find currency symbol under specified currency id */
295 table_entries = ures_getByKey (subbundle, country, NULL, &ec);
296 if (U_SUCCESS (ec)) {
297 /* get the first string only,
298 * the second is international currency symbol (not used)*/
299 new_nf->currencySymbol=monostring_from_resource_index (table_entries, 0);
300 ures_close (table_entries);
302 ures_close (subbundle);
310 subbundle=open_subbundle (bundle, "NumberPatterns", 4);
311 if(subbundle!=NULL) {
312 new_nf->decimalFormats=monostring_from_resource_index (subbundle, 0);
313 new_nf->currencyFormats=monostring_from_resource_index (subbundle, 1);
314 new_nf->percentFormats=monostring_from_resource_index (subbundle, 2);
315 ures_close (subbundle);
317 /* calls InitPatterns to parse the patterns
319 methodDesc = mono_method_desc_new (
320 "System.Globalization.NumberFormatInfo:InitPatterns()",
322 method = mono_method_desc_search_in_class (methodDesc, class);
324 mono_runtime_invoke (method, new_nf, NULL, NULL);
326 g_warning (G_GNUC_PRETTY_FUNCTION ": Runtime mismatch with class lib! (Looking for System.Globalization.NumberFormatInfo:InitPatterns())");
335 static char *mono_string_to_icu_locale (MonoString *locale)
338 char *passed_locale, *icu_locale=NULL;
339 int32_t loc_len, ret;
341 passed_locale=mono_string_to_utf8 (locale);
344 ret=uloc_getName (passed_locale, NULL, 0, &ec);
345 if(ec==U_BUFFER_OVERFLOW_ERROR) {
348 icu_locale=(char *)g_malloc0 (sizeof(char)*loc_len);
349 ret=uloc_getName (passed_locale, icu_locale, loc_len, &ec);
351 g_free (passed_locale);
356 void ves_icall_System_Globalization_CultureInfo_construct_internal_locale (MonoCultureInfo *this, MonoString *locale)
362 int32_t str_len, ret;
366 icu_locale=mono_string_to_icu_locale (locale);
367 if(icu_locale==NULL) {
368 /* Something went wrong */
369 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "SystemException"));
373 /* Fill in the static fields */
375 /* TODO: Calendar, InstalledUICulture, OptionalCalendars,
379 str_len=256; /* Should be big enough for anything */
380 str=(char *)g_malloc0 (sizeof(char)*str_len);
381 ustr=(UChar *)g_malloc0 (sizeof(UChar)*str_len);
385 ret=uloc_getDisplayName (icu_locale, "en", ustr, str_len, &ec);
386 if(U_SUCCESS (ec) && ret<str_len) {
387 this->englishname=mono_string_from_utf16 ((gunichar2 *)ustr);
390 ret=uloc_getDisplayName (icu_locale, uloc_getDefault (), ustr, str_len,
392 if(U_SUCCESS (ec) && ret<str_len) {
393 this->displayname=mono_string_from_utf16 ((gunichar2 *)ustr);
396 ret=uloc_getDisplayName (icu_locale, icu_locale, ustr, str_len, &ec);
397 if(U_SUCCESS (ec) && ret<str_len) {
398 this->nativename=mono_string_from_utf16 ((gunichar2 *)ustr);
401 this->iso3lang=mono_string_new_wrapper (uloc_getISO3Language (icu_locale));
403 ret=uloc_getLanguage (icu_locale, str, str_len, &ec);
404 if(U_SUCCESS (ec) && ret<str_len) {
405 this->iso2lang=mono_string_new_wrapper (str);
408 this->datetime_format=create_DateTimeFormat (icu_locale);
409 this->number_format=create_NumberFormat (icu_locale);
416 void ves_icall_System_Globalization_CompareInfo_construct_compareinfo (MonoCompareInfo *comp, MonoString *locale)
425 g_message (G_GNUC_PRETTY_FUNCTION ": Constructing collator for locale [%s]", mono_string_to_utf8 (locale));
428 icu_locale=mono_string_to_icu_locale (locale);
429 if(icu_locale==NULL) {
430 /* Something went wrong */
431 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "SystemException"));
436 coll=ucol_open (icu_locale, &ec);
438 comp->ICU_collator=coll;
440 comp->ICU_collator=NULL;
446 /* Set up the collator to reflect the options required. Some of these
447 * options clash, as they adjust the collator strength level. Try to
448 * make later checks reduce the strength level, and attempt to take
449 * previous options into account.
451 * Don't bother to check the error returns when setting the
452 * attributes, as a failure here is hardly grounds to error out.
454 static void set_collator_options (UCollator *coll, gint32 options)
456 UErrorCode ec=U_ZERO_ERROR;
458 /* Set up the defaults */
459 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
461 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_OFF, &ec);
463 /* Do this first so other options will override the quaternary
464 * level strength setting if necessary
466 if(!(options & CompareOptions_IgnoreKanaType)) {
467 ucol_setAttribute (coll, UCOL_HIRAGANA_QUATERNARY_MODE,
469 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_QUATERNARY, &ec);
472 /* Word sort, the default */
473 if(!(options & CompareOptions_StringSort)) {
474 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING,
476 /* Tertiary strength is the default, but it might have
477 * been set to quaternary above. (We don't want that
478 * here, because that will order all the punctuation
479 * first instead of just ignoring it.)
481 * Unfortunately, tertiary strength with
482 * ALTERNATE_HANDLING==SHIFTED means that '/' and '@'
483 * compare to equal, which has the nasty side effect
484 * of killing mcs :-( (We can't specify a
485 * culture-insensitive compare, because
486 * String.StartsWith doesn't have that option.)
488 * ALTERNATE_HANDLING==SHIFTED is needed to accomplish
489 * the word-sorting-ignoring-punctuation feature. So
490 * we have to live with the slightly mis-ordered
491 * punctuation and a working mcs...
493 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_QUATERNARY, &ec);
496 if(options & CompareOptions_IgnoreCase) {
497 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_SECONDARY, &ec);
500 if(options & CompareOptions_IgnoreWidth) {
501 /* Kana width is a tertiary strength difference. This
502 * will totally break the !IgnoreKanaType option
504 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_SECONDARY, &ec);
507 if(options & CompareOptions_IgnoreNonSpace) {
508 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
509 /* We can still compare case even when just checking
512 if(!(options & CompareOptions_IgnoreCase) ||
513 !(options & CompareOptions_IgnoreWidth)) {
514 /* Not sure if CASE_LEVEL handles kana width
516 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON,
521 if(options & CompareOptions_IgnoreSymbols) {
522 /* Don't know what to do here */
525 if(options == CompareOptions_Ordinal) {
526 /* This one is handled elsewhere */
530 gint32 ves_icall_System_Globalization_CompareInfo_internal_compare (MonoCompareInfo *this, MonoString *str1, gint32 off1, gint32 len1, MonoString *str2, gint32 off2, gint32 len2, gint32 options)
533 UCollationResult result;
538 g_message (G_GNUC_PRETTY_FUNCTION ": Comparing [%s] and [%s]", mono_string_to_utf8 (str1), mono_string_to_utf8 (str2));
541 coll=this->ICU_collator;
544 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", this->lcid);
547 if(coll==NULL || this->lcid==0x007F ||
548 options & CompareOptions_Ordinal) {
550 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
553 return(string_invariant_compare (str1, off1, len1, str2, off2,
557 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
559 set_collator_options (coll, options);
561 result=ucol_strcoll (coll, mono_string_chars (str1)+off1, len1,
562 mono_string_chars (str2)+off2, len2);
564 mono_monitor_exit ((MonoObject *)this);
567 g_message (G_GNUC_PRETTY_FUNCTION ": Comparison of [%s] and [%s] returning %d", mono_string_to_utf8 (str1), mono_string_to_utf8 (str2), result);
573 void ves_icall_System_Globalization_CompareInfo_free_internal_collator (MonoCompareInfo *this)
579 coll=this->ICU_collator;
585 void ves_icall_System_Globalization_CompareInfo_assign_sortkey (MonoCompareInfo *this, MonoSortKey *key, MonoString *source, gint32 options)
594 coll=this->ICU_collator;
596 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "SystemException"));
600 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
602 set_collator_options (coll, options);
604 keylen=ucol_getSortKey (coll, mono_string_chars (source), -1, NULL, 0);
605 keybuf=g_malloc (sizeof(char)* keylen);
606 ucol_getSortKey (coll, mono_string_chars (source), -1, keybuf, keylen);
608 mono_monitor_exit ((MonoObject *)this);
610 arr=mono_array_new (mono_domain_get (), mono_defaults.byte_class,
612 for(i=0; i<keylen; i++) {
613 mono_array_set (arr, guint8, i, keybuf[i]);
621 int ves_icall_System_Globalization_CompareInfo_internal_index (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, MonoString *value, gint32 options, MonoBoolean first)
626 UStringSearch *search;
632 g_message (G_GNUC_PRETTY_FUNCTION ": Finding %s [%s] in [%s] (sindex %d,count %d)", first?"first":"last", mono_string_to_utf8 (value), mono_string_to_utf8 (source), sindex, count);
635 coll=this->ICU_collator;
638 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", this->lcid);
641 if(coll==NULL || this->lcid==0x007F ||
642 options & CompareOptions_Ordinal) {
644 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
647 return(string_invariant_indexof (source, sindex, count, value,
651 usrcstr=g_malloc0 (sizeof(UChar)*(count+1));
653 memcpy (usrcstr, mono_string_chars (source)+sindex,
654 sizeof(UChar)*count);
656 memcpy (usrcstr, mono_string_chars (source)+sindex-count+1,
657 sizeof(UChar)*count);
660 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
664 /* Need to set the collator to a fairly weak level, so that it
665 * treats characters that can be written differently as
666 * identical (eg "ß" and "ss", "æ" and "ae" or "ä" etc.) Note
667 * that this means that the search string and the original
668 * text might have differing lengths.
670 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
672 /* Still notice case differences though (normally a tertiary
675 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON, &ec);
677 /* Don't ignore some codepoints */
678 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
681 search=usearch_openFromCollator (mono_string_chars (value), -1, usrcstr, -1, coll, NULL,
685 pos=usearch_first (search, &ec);
687 pos=usearch_last (search, &ec);
690 if(pos!=USEARCH_DONE) {
692 g_message (G_GNUC_PRETTY_FUNCTION
693 ": Got match at %d (sindex %d) len %d", pos,
694 sindex, usearch_getMatchedLength (search));
700 pos+=(sindex-count+1);
705 g_message (G_GNUC_PRETTY_FUNCTION ": usearch_open error: %s",
709 usearch_close (search);
711 mono_monitor_exit ((MonoObject *)this);
718 int ves_icall_System_Globalization_CompareInfo_internal_index_char (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, gunichar2 value, gint32 options, MonoBoolean first)
721 UChar *usrcstr, uvalstr[2]={0, 0};
723 UStringSearch *search;
729 g_message (G_GNUC_PRETTY_FUNCTION ": Finding %s 0x%0x in [%s] (sindex %d,count %d)", first?"first":"last", value, mono_string_to_utf8 (source), sindex, count);
732 coll=this->ICU_collator;
735 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", this->lcid);
738 if(coll==NULL || this->lcid==0x007F ||
739 options & CompareOptions_Ordinal) {
741 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
744 return(string_invariant_indexof_char (source, sindex, count,
748 usrcstr=g_malloc0 (sizeof(UChar)*(count+1));
750 memcpy (usrcstr, mono_string_chars (source)+sindex,
751 sizeof(UChar)*count);
753 memcpy (usrcstr, mono_string_chars (source)+sindex-count+1,
754 sizeof(UChar)*count);
758 mono_monitor_try_enter ((MonoObject *)this, INFINITE);
762 /* Need to set the collator to a fairly weak level, so that it
763 * treats characters that can be written differently as
764 * identical (eg "ß" and "ss", "æ" and "ae" or "ä" etc.) Note
765 * that this means that the search string and the original
766 * text might have differing lengths.
768 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
770 /* Still notice case differences though (normally a tertiary
773 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON, &ec);
775 /* Don't ignore some codepoints */
776 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
779 search=usearch_openFromCollator (uvalstr, -1, usrcstr, -1, coll, NULL,
783 pos=usearch_first (search, &ec);
785 pos=usearch_last (search, &ec);
788 if(pos!=USEARCH_DONE) {
790 g_message (G_GNUC_PRETTY_FUNCTION
791 ": Got match at %d (sindex %d) len %d", pos,
792 sindex, usearch_getMatchedLength (search));
798 pos+=(sindex-count+1);
803 g_message (G_GNUC_PRETTY_FUNCTION ": usearch_open error: %s",
807 usearch_close (search);
809 mono_monitor_exit ((MonoObject *)this);
816 int ves_icall_System_Threading_Thread_current_lcid (void)
820 return(uloc_getLCID (uloc_getDefault ()));
823 MonoString *ves_icall_System_String_InternalReplace_Str_Comp (MonoString *this, MonoString *old, MonoString *new, MonoCompareInfo *comp)
825 MonoString *ret=NULL;
828 UStringSearch *search;
833 g_message (G_GNUC_PRETTY_FUNCTION ": Replacing [%s] with [%s] in [%s]", mono_string_to_utf8 (old), mono_string_to_utf8 (new), mono_string_to_utf8 (this));
836 coll=comp->ICU_collator;
839 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", comp->lcid);
842 if(coll==NULL || comp->lcid==0x007F) {
844 g_message (G_GNUC_PRETTY_FUNCTION ": No collator or invariant, using shortcut");
847 return(string_invariant_replace (this, old, new));
850 mono_monitor_try_enter ((MonoObject *)comp, INFINITE);
854 /* Need to set the collator to a fairly weak level, so that it
855 * treats characters that can be written differently as
856 * identical (eg "ß" and "ss", "æ" and "ae" or "ä" etc.) Note
857 * that this means that the search string and the original
858 * text might have differing lengths.
860 ucol_setAttribute (coll, UCOL_STRENGTH, UCOL_PRIMARY, &ec);
862 /* Still notice case differences though (normally a tertiary
865 ucol_setAttribute (coll, UCOL_CASE_LEVEL, UCOL_ON, &ec);
867 /* Don't ignore some codepoints */
868 ucol_setAttribute (coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
871 search=usearch_openFromCollator (mono_string_chars (old), -1,
872 mono_string_chars (this), -1, coll,
875 int pos, oldpos, len_delta=0;
876 int32_t newstr_len=mono_string_length (new);
879 for(pos=usearch_first (search, &ec);
881 pos=usearch_next (search, &ec)) {
883 g_message (G_GNUC_PRETTY_FUNCTION
884 ": Got match at %d len %d", pos,
885 usearch_getMatchedLength (search));
888 len_delta += (newstr_len -
889 usearch_getMatchedLength (search));
892 g_message (G_GNUC_PRETTY_FUNCTION
893 ": New string length is %d (delta %d)",
894 mono_string_length (this)+len_delta, len_delta);
897 uret=(UChar *)g_malloc0 (sizeof(UChar) * (mono_string_length (this)+len_delta+2));
899 for(oldpos=0, pos=usearch_first (search, &ec);
901 pos=usearch_next (search, &ec)) {
902 /* Add the unmatched text */
903 u_strncat (uret, mono_string_chars (this)+oldpos,
905 /* Then the replacement */
906 u_strcat (uret, mono_string_chars (new));
907 oldpos=pos+usearch_getMatchedLength (search);
910 /* Finish off with the trailing unmatched text */
911 u_strcat (uret, mono_string_chars (this)+oldpos);
913 ret=mono_string_from_utf16 ((gunichar2 *)uret);
915 g_message (G_GNUC_PRETTY_FUNCTION ": usearch_open error: %s",
919 usearch_close (search);
921 mono_monitor_exit ((MonoObject *)comp);
924 g_message (G_GNUC_PRETTY_FUNCTION ": Replacing [%s] with [%s] in [%s] returns [%s]", mono_string_to_utf8 (old), mono_string_to_utf8 (new), mono_string_to_utf8 (this), mono_string_to_utf8 (ret));
930 MonoString *ves_icall_System_String_InternalToLower_Comp (MonoString *this, MonoCultureInfo *cult)
939 g_message (G_GNUC_PRETTY_FUNCTION ": [%s]",
940 mono_string_to_utf8 (this));
944 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", cult->lcid);
947 if(cult->lcid==0x007F) {
949 g_message (G_GNUC_PRETTY_FUNCTION
950 ": Invariant, using shortcut");
953 return(string_invariant_tolower (this));
956 icu_loc=mono_string_to_icu_locale (cult->icu_name);
958 mono_raise_exception ((MonoException *)mono_exception_from_name (mono_defaults.corlib, "System", "SystemException"));
962 udest=(UChar *)g_malloc0 (sizeof(UChar)*(mono_string_length (this)+1));
964 /* According to the docs, this might result in a longer or
965 * shorter string than we started with...
969 len=u_strToLower (udest, mono_string_length (this)+1,
970 mono_string_chars (this), -1, icu_loc, &ec);
971 if(ec==U_BUFFER_OVERFLOW_ERROR ||
972 ec==U_STRING_NOT_TERMINATED_WARNING) {
974 udest=(UChar *)g_malloc0 (sizeof(UChar)*(len+1));
975 len=u_strToLower (udest, len+1, mono_string_chars (this), -1,
980 ret=mono_string_from_utf16 ((gunichar2 *)udest);
982 g_message (G_GNUC_PRETTY_FUNCTION ": u_strToLower error: %s",
984 /* return something */
992 g_message (G_GNUC_PRETTY_FUNCTION ": returning [%s]",
993 mono_string_to_utf8 (ret));
999 MonoString *ves_icall_System_String_InternalToUpper_Comp (MonoString *this, MonoCultureInfo *cult)
1008 g_message (G_GNUC_PRETTY_FUNCTION ": [%s]",
1009 mono_string_to_utf8 (this));
1013 g_message (G_GNUC_PRETTY_FUNCTION ": LCID is %d", cult->lcid);
1016 if(cult->lcid==0x007F) {
1018 g_message (G_GNUC_PRETTY_FUNCTION
1019 ": Invariant, using shortcut");
1022 return(string_invariant_toupper (this));
1025 icu_loc=mono_string_to_icu_locale (cult->icu_name);
1027 mono_raise_exception ((MonoException *)mono_exception_from_name (mono_defaults.corlib, "System", "SystemException"));
1031 udest=(UChar *)g_malloc0 (sizeof(UChar)*(mono_string_length (this)+1));
1033 /* According to the docs, this might result in a longer or
1034 * shorter string than we started with...
1038 len=u_strToUpper (udest, mono_string_length (this)+1,
1039 mono_string_chars (this), -1, icu_loc, &ec);
1040 if(ec==U_BUFFER_OVERFLOW_ERROR ||
1041 ec==U_STRING_NOT_TERMINATED_WARNING) {
1043 udest=(UChar *)g_malloc0 (sizeof(UChar)*(len+1));
1044 len=u_strToUpper (udest, len+1, mono_string_chars (this), -1,
1048 if(U_SUCCESS (ec)) {
1049 ret=mono_string_from_utf16 ((gunichar2 *)udest);
1051 g_message (G_GNUC_PRETTY_FUNCTION ": u_strToUpper error: %s",
1053 /* return something */
1061 g_message (G_GNUC_PRETTY_FUNCTION ": returning [%s]",
1062 mono_string_to_utf8 (ret));
1068 #else /* HAVE_ICU */
1069 void ves_icall_System_Globalization_CultureInfo_construct_internal_locale (MonoCultureInfo *this, MonoString *locale)
1071 MONO_ARCH_SAVE_REGS;
1073 /* Always claim "unknown locale" if we don't have ICU (only
1074 * called for non-invariant locales)
1076 mono_raise_exception((MonoException *)mono_exception_from_name(mono_defaults.corlib, "System", "ArgumentException"));
1079 void ves_icall_System_Globalization_CompareInfo_construct_compareinfo (MonoCompareInfo *comp, MonoString *locale)
1081 /* Nothing to do here */
1084 int ves_icall_System_Globalization_CompareInfo_internal_compare (MonoCompareInfo *this, MonoString *str1, gint32 off1, gint32 len1, MonoString *str2, gint32 off2, gint32 len2, gint32 options)
1086 MONO_ARCH_SAVE_REGS;
1088 /* Do a normal ascii string compare, as we only know the
1089 * invariant locale if we dont have ICU
1091 return(string_invariant_compare (str1, off1, len1, str2, off2, len2,
1095 void ves_icall_System_Globalization_CompareInfo_free_internal_collator (MonoCompareInfo *this)
1097 /* Nothing to do here */
1100 void ves_icall_System_Globalization_CompareInfo_assign_sortkey (MonoCompareInfo *this, MonoSortKey *key, MonoString *source, gint32 options)
1105 MONO_ARCH_SAVE_REGS;
1107 keylen=mono_string_length (source);
1109 arr=mono_array_new (mono_domain_get (), mono_defaults.byte_class,
1111 for(i=0; i<keylen; i++) {
1112 mono_array_set (arr, guint8, i, mono_string_chars (source)[i]);
1118 int ves_icall_System_Globalization_CompareInfo_internal_index (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, MonoString *value, gint32 options, MonoBoolean first)
1120 MONO_ARCH_SAVE_REGS;
1122 return(string_invariant_indexof (source, sindex, count, value, first));
1125 int ves_icall_System_Globalization_CompareInfo_internal_index_char (MonoCompareInfo *this, MonoString *source, gint32 sindex, gint32 count, gunichar2 value, gint32 options, MonoBoolean first)
1127 MONO_ARCH_SAVE_REGS;
1129 return(string_invariant_indexof_char (source, sindex, count, value,
1133 int ves_icall_System_Threading_Thread_current_lcid (void)
1135 MONO_ARCH_SAVE_REGS;
1141 MonoString *ves_icall_System_String_InternalReplace_Str_Comp (MonoString *this, MonoString *old, MonoString *new, MonoCompareInfo *comp)
1143 MONO_ARCH_SAVE_REGS;
1145 /* Do a normal ascii string compare and replace, as we only
1146 * know the invariant locale if we dont have ICU
1148 return(string_invariant_replace (this, old, new));
1151 MonoString *ves_icall_System_String_InternalToLower_Comp (MonoString *this, MonoCultureInfo *cult)
1153 MONO_ARCH_SAVE_REGS;
1155 return(string_invariant_tolower (this));
1158 MonoString *ves_icall_System_String_InternalToUpper_Comp (MonoString *this, MonoCultureInfo *cult)
1160 MONO_ARCH_SAVE_REGS;
1162 return(string_invariant_toupper (this));
1165 #endif /* HAVE_ICU */
1167 static gint32 string_invariant_compare_char (gunichar2 c1, gunichar2 c2,
1171 GUnicodeType c1type, c2type;
1173 c1type = g_unichar_type (c1);
1174 c2type = g_unichar_type (c2);
1176 if (options & CompareOptions_IgnoreCase) {
1177 result = (gint32) (c1type != G_UNICODE_LOWERCASE_LETTER ? g_unichar_tolower(c1) : c1) - (c2type != G_UNICODE_LOWERCASE_LETTER ? g_unichar_tolower(c2) : c2);
1178 } else if (options & CompareOptions_Ordinal) {
1179 // Rotor/ms return the full value just not -1 and 1
1180 return (gint32) c1 - c2;
1182 /* No options. Kana, symbol and spacing options don't
1183 * apply to the invariant culture.
1185 if (c1type == G_UNICODE_UPPERCASE_LETTER &&
1186 c2type == G_UNICODE_LOWERCASE_LETTER) {
1190 if (c1type == G_UNICODE_LOWERCASE_LETTER &&
1191 c2type == G_UNICODE_UPPERCASE_LETTER) {
1195 result = (gint32) c1 - c2;
1198 return ((result < 0) ? -1 : (result > 0) ? 1 : 0);
1201 static gint32 string_invariant_compare (MonoString *str1, gint32 off1,
1202 gint32 len1, MonoString *str2,
1203 gint32 off2, gint32 len2,
1206 /* c translation of C# code from old string.cs.. :) */
1219 ustr1 = mono_string_chars(str1)+off1;
1220 ustr2 = mono_string_chars(str2)+off2;
1224 for (pos = 0; pos != length; pos++) {
1225 if (pos >= len1 || pos >= len2)
1228 charcmp = string_invariant_compare_char(ustr1[pos], ustr2[pos],
1235 /* the lesser wins, so if we have looped until length we just
1236 * need to check the last char
1238 if (pos == length) {
1239 return(string_invariant_compare_char(ustr1[pos - 1],
1240 ustr2[pos - 1], options));
1243 /* Test if one of the strings has been compared to the end */
1250 } else if (pos >= len2) {
1254 /* if not, check our last char only.. (can this happen?) */
1255 return(string_invariant_compare_char(ustr1[pos], ustr2[pos], options));
1258 static MonoString *string_invariant_replace (MonoString *me,
1259 MonoString *oldValue,
1260 MonoString *newValue)
1264 gunichar2 *dest=NULL; /* shut gcc up */
1266 gunichar2 *newstr=NULL; /* shut gcc up here too */
1277 oldstr = mono_string_chars(oldValue);
1278 oldstrlen = mono_string_length(oldValue);
1280 if (NULL != newValue) {
1281 newstr = mono_string_chars(newValue);
1282 newstrlen = mono_string_length(newValue);
1286 src = mono_string_chars(me);
1287 srclen = mono_string_length(me);
1289 if (oldstrlen != newstrlen) {
1290 for (i = 0; i <= srclen - oldstrlen; i++)
1291 if (0 == memcmp(src + i, oldstr, oldstrlen * sizeof(gunichar2)))
1295 newsize = srclen + ((newstrlen - oldstrlen) * occurr);
1301 while (i < srclen) {
1302 if (0 == memcmp(src + i, oldstr, oldstrlen * sizeof(gunichar2))) {
1304 ret = mono_string_new_size( mono_domain_get (), newsize);
1305 dest = mono_string_chars(ret);
1306 memcpy (dest, src, i * sizeof(gunichar2));
1308 if (newstrlen > 0) {
1309 memcpy(dest + destpos, newstr, newstrlen * sizeof(gunichar2));
1310 destpos += newstrlen;
1314 } else if (ret != NULL) {
1315 dest[destpos] = src[i];
1327 static gint32 string_invariant_indexof (MonoString *source, gint32 sindex,
1328 gint32 count, MonoString *value,
1336 lencmpstr = mono_string_length(value);
1338 src = mono_string_chars(source);
1339 cmpstr = mono_string_chars(value);
1343 for(pos=sindex;pos <= sindex+count;pos++) {
1344 for(i=0;src[pos+i]==cmpstr[i];) {
1345 if(++i==lencmpstr) {
1353 for(pos=sindex-lencmpstr+1;pos>sindex-count;pos--) {
1354 if(memcmp (src+pos, cmpstr,
1355 lencmpstr*sizeof(gunichar2))==0) {
1364 static gint32 string_invariant_indexof_char (MonoString *source, gint32 sindex,
1365 gint32 count, gunichar2 value,
1371 src = mono_string_chars(source);
1373 for (pos = sindex; pos != count + sindex; pos++) {
1374 if (src [pos] == value) {
1381 for (pos = sindex; pos > sindex - count; pos--) {
1382 if (src [pos] == value)
1390 static MonoString *string_invariant_tolower (MonoString *this)
1397 ret = mono_string_new_size(mono_domain_get (),
1398 mono_string_length(this));
1400 src = mono_string_chars (this);
1401 dest = mono_string_chars (ret);
1403 for (i = 0; i < mono_string_length (this); ++i) {
1404 dest[i] = g_unichar_tolower(src[i]);
1410 static MonoString *string_invariant_toupper (MonoString *this)
1417 ret = mono_string_new_size(mono_domain_get (),
1418 mono_string_length(this));
1420 src = mono_string_chars (this);
1421 dest = mono_string_chars (ret);
1423 for (i = 0; i < mono_string_length (this); ++i) {
1424 dest[i] = g_unichar_toupper(src[i]);