1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
32 #include "vm/global.h"
34 #include "mm/memory.h"
36 #include "native/jni.h"
37 #include "native/llni.h"
39 #include "native/include/java_lang_String.h"
41 #include "threads/lock-common.h"
44 #include "vm/builtin.h"
45 #include "vm/exceptions.h"
46 #include "vm/primitive.h"
47 #include "vm/stringlocal.h"
50 #include "vmcore/options.h"
51 #include "vmcore/statistics.h"
52 #include "vmcore/utf8.h"
55 /* global variables ***********************************************************/
57 /* hashsize must be power of 2 */
59 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
61 hashtable hashtable_string; /* hashtable for javastrings */
63 #if defined(ENABLE_THREADS)
64 static java_object_t *lock_hashtable_string;
68 /* XXX preliminary typedef, will be removed once string.c and utf8.c are
71 #if defined(ENABLE_HANDLES)
72 typedef heap_java_lang_String heapstring_t;
74 typedef java_lang_String heapstring_t;
78 /* string_init *****************************************************************
80 Initialize the string hashtable lock.
82 *******************************************************************************/
84 bool string_init(void)
86 TRACESUBSYSTEMINITIALIZATION("string_init");
88 /* create string (javastring) hashtable */
90 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
92 #if defined(ENABLE_THREADS)
93 /* create string hashtable lock object */
95 lock_hashtable_string = NEW(java_object_t);
97 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
100 /* everything's ok */
106 /* stringtable_update **********************************************************
108 Traverses the javastring hashtable and sets the vftbl-entries of
109 javastrings which were temporarily set to NULL, because
110 java.lang.Object was not yet loaded.
112 *******************************************************************************/
114 void stringtable_update(void)
118 literalstring *s; /* hashtable entry */
121 for (i = 0; i < hashtable_string.size; i++) {
122 s = hashtable_string.ptr[i];
125 js = (heapstring_t *) s->string;
127 if ((js == NULL) || (js->value == NULL)) {
128 /* error in hashtable found */
130 vm_abort("stringtable_update: invalid literalstring in hashtable");
135 if (!js->header.vftbl)
136 /* vftbl of javastring is NULL */
137 js->header.vftbl = class_java_lang_String->vftbl;
139 if (!a->header.objheader.vftbl)
140 /* vftbl of character-array is NULL */
141 a->header.objheader.vftbl =
142 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
144 /* follow link in external hash chain */
152 /* javastring_new_from_utf_buffer **********************************************
154 Create a new object of type java/lang/String with the text from
155 the specified utf8 buffer.
158 buffer.......points to first char in the buffer
159 blength......number of bytes to read from the buffer
162 the java.lang.String object, or
163 NULL if an exception has been thrown
165 *******************************************************************************/
167 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
170 const char *utf_ptr; /* current utf character in utf string */
171 u4 utflength; /* length of utf-string if uncompressed */
173 java_lang_String *s; /* result-string */
174 java_handle_chararray_t *a;
179 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
181 o = builtin_new(class_java_lang_String);
182 a = builtin_newarray_char(utflength);
184 /* javastring or character-array could not be created */
186 if ((o == NULL) || (a == NULL))
189 /* decompress utf-string */
193 for (i = 0; i < utflength; i++)
194 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
196 /* set fields of the javastring-object */
198 s = (java_lang_String *) o;
200 LLNI_field_set_ref(s, value , a);
201 LLNI_field_set_val(s, offset, 0);
202 LLNI_field_set_val(s, count , utflength);
208 /* javastring_safe_new_from_utf8 ***********************************************
210 Create a new object of type java/lang/String with the text from
211 the specified UTF-8 string. This function is safe for invalid UTF-8.
212 (Invalid characters will be replaced by U+fffd.)
215 text.........the UTF-8 string, zero-terminated.
218 the java.lang.String object, or
219 NULL if an exception has been thrown
221 *******************************************************************************/
223 java_handle_t *javastring_safe_new_from_utf8(const char *text)
226 java_handle_chararray_t *a;
233 /* Get number of bytes. We need this to completely emulate the messy */
234 /* behaviour of the RI. :( */
236 nbytes = strlen(text);
238 /* calculate number of Java characters */
240 len = utf8_safe_number_of_u2s(text, nbytes);
242 /* allocate the String object and the char array */
244 o = builtin_new(class_java_lang_String);
245 a = builtin_newarray_char(len);
247 /* javastring or character-array could not be created? */
249 if ((o == NULL) || (a == NULL))
252 /* decompress UTF-8 string */
254 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(a));
256 /* set fields of the String object */
258 s = (java_lang_String *) o;
260 LLNI_field_set_ref(s, value , a);
261 LLNI_field_set_val(s, offset, 0);
262 LLNI_field_set_val(s, count , len);
268 /* javastring_new_from_utf_string **********************************************
270 Create a new object of type java/lang/String with the text from
271 the specified zero-terminated utf8 string.
274 buffer.......points to first char in the buffer
275 blength......number of bytes to read from the buffer
278 the java.lang.String object, or
279 NULL if an exception has been thrown
281 *******************************************************************************/
283 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
287 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
291 /* javastring_new **************************************************************
293 creates a new object of type java/lang/String with the text of
294 the specified utf8-string
296 return: pointer to the string or NULL if memory is exhausted.
298 *******************************************************************************/
300 java_handle_t *javastring_new(utf *u)
302 char *utf_ptr; /* current utf character in utf string */
303 u4 utflength; /* length of utf-string if uncompressed */
305 java_handle_chararray_t *a;
310 exceptions_throw_nullpointerexception();
315 utflength = utf_get_number_of_u2s(u);
317 o = builtin_new(class_java_lang_String);
318 a = builtin_newarray_char(utflength);
320 /* javastring or character-array could not be created */
322 if ((o == NULL) || (a == NULL))
325 /* decompress utf-string */
327 for (i = 0; i < utflength; i++)
328 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
330 /* set fields of the javastring-object */
332 s = (java_lang_String *) o;
334 LLNI_field_set_ref(s, value , a);
335 LLNI_field_set_val(s, offset, 0);
336 LLNI_field_set_val(s, count , utflength);
342 /* javastring_new_slash_to_dot *************************************************
344 creates a new object of type java/lang/String with the text of
345 the specified utf8-string with slashes changed to dots
347 return: pointer to the string or NULL if memory is exhausted.
349 *******************************************************************************/
351 java_handle_t *javastring_new_slash_to_dot(utf *u)
353 char *utf_ptr; /* current utf character in utf string */
354 u4 utflength; /* length of utf-string if uncompressed */
356 java_handle_chararray_t *a;
362 exceptions_throw_nullpointerexception();
367 utflength = utf_get_number_of_u2s(u);
369 o = builtin_new(class_java_lang_String);
370 a = builtin_newarray_char(utflength);
372 /* javastring or character-array could not be created */
373 if ((o == NULL) || (a == NULL))
376 /* decompress utf-string */
378 for (i = 0; i < utflength; i++) {
379 ch = utf_nextu2(&utf_ptr);
382 LLNI_array_direct(a, i) = ch;
385 /* set fields of the javastring-object */
387 s = (java_lang_String *) o;
389 LLNI_field_set_ref(s, value , a);
390 LLNI_field_set_val(s, offset, 0);
391 LLNI_field_set_val(s, count , utflength);
397 /* javastring_new_from_ascii ***************************************************
399 creates a new java/lang/String object which contains the given ASCII
400 C-string converted to UTF-16.
403 text.........string of ASCII characters
406 the java.lang.String object, or
407 NULL if an exception has been thrown.
409 *******************************************************************************/
411 java_handle_t *javastring_new_from_ascii(const char *text)
414 s4 len; /* length of the string */
417 java_handle_chararray_t *a;
420 exceptions_throw_nullpointerexception();
426 o = builtin_new(class_java_lang_String);
427 a = builtin_newarray_char(len);
429 /* javastring or character-array could not be created */
431 if ((o == NULL) || (a == NULL))
436 for (i = 0; i < len; i++)
437 LLNI_array_direct(a, i) = text[i];
439 /* set fields of the javastring-object */
441 s = (java_lang_String *) o;
443 LLNI_field_set_ref(s, value , a);
444 LLNI_field_set_val(s, offset, 0);
445 LLNI_field_set_val(s, count , len);
451 /* javastring_tochar ***********************************************************
453 converts a Java string into a C string.
455 return: pointer to C string
457 Caution: calling method MUST release the allocated memory!
459 *******************************************************************************/
461 char *javastring_tochar(java_handle_t *so)
463 java_lang_String *s = (java_lang_String *) so;
464 java_handle_chararray_t *a;
473 LLNI_field_get_ref(s, value, a);
478 LLNI_field_get_val(s, count, count);
479 LLNI_field_get_val(s, offset, offset);
481 buf = MNEW(char, count + 1);
483 for (i = 0; i < count; i++)
484 buf[i] = LLNI_array_direct(a, offset + i);
492 /* javastring_toutf ************************************************************
494 Make utf symbol from javastring.
496 *******************************************************************************/
498 utf *javastring_toutf(java_handle_t *string, bool isclassname)
501 java_handle_chararray_t *value;
505 s = (java_lang_String *) string;
510 LLNI_field_get_ref(s, value, value);
515 LLNI_field_get_val(s, count, count);
516 LLNI_field_get_val(s, offset, offset);
518 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
522 /* literalstring_u2 ************************************************************
524 Searches for the literalstring with the specified u2-array in the
525 string hashtable, if there is no such string a new one is created.
527 If copymode is true a copy of the u2-array is made.
529 *******************************************************************************/
531 static java_object_t *literalstring_u2(java_chararray_t *a, u4 length,
532 u4 offset, bool copymode)
534 literalstring *s; /* hashtable element */
535 heapstring_t *js; /* u2-array wrapped in javastring */
536 java_chararray_t *ca; /* copy of u2-array */
541 LOCK_MONITOR_ENTER(lock_hashtable_string);
543 /* find location in hashtable */
545 key = unicode_hashkey(a->data + offset, length);
546 slot = key & (hashtable_string.size - 1);
547 s = hashtable_string.ptr[slot];
550 js = (heapstring_t *) s->string;
552 if (length == js->count) {
555 for (i = 0; i < length; i++)
556 if (a->data[offset + i] != js->value->data[i])
559 /* string already in hashtable, free memory */
562 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
564 LOCK_MONITOR_EXIT(lock_hashtable_string);
566 return (java_object_t *) js;
570 /* follow link in external hash chain */
575 /* create copy of u2-array for new javastring */
576 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
577 ca = mem_alloc(arraysize);
578 /* memcpy(ca, a, arraysize); */
579 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
580 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
586 /* location in hashtable found, complete arrayheader */
588 ca->header.objheader.vftbl =
589 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
590 ca->header.size = length;
592 assert(class_java_lang_String);
593 assert(class_java_lang_String->state & CLASS_LOADED);
595 /* create new javastring */
597 js = NEW(heapstring_t);
599 #if defined(ENABLE_STATISTICS)
601 size_string += sizeof(heapstring_t);
604 #if defined(ENABLE_THREADS)
605 lock_init_object_lock(&js->header);
608 js->header.vftbl = class_java_lang_String->vftbl;
613 /* create new literalstring */
615 s = NEW(literalstring);
617 #if defined(ENABLE_STATISTICS)
619 size_string += sizeof(literalstring);
622 s->hashlink = hashtable_string.ptr[slot];
623 s->string = (java_object_t *) js;
624 hashtable_string.ptr[slot] = s;
626 /* update number of hashtable entries */
628 hashtable_string.entries++;
630 /* reorganization of hashtable */
632 if (hashtable_string.entries > (hashtable_string.size * 2)) {
633 /* reorganization of hashtable, average length of the external
634 chains is approx. 2 */
638 literalstring *nexts;
640 hashtable newhash; /* the new hashtable */
642 /* create new hashtable, double the size */
644 hashtable_create(&newhash, hashtable_string.size * 2);
645 newhash.entries = hashtable_string.entries;
647 /* transfer elements to new hashtable */
649 for (i = 0; i < hashtable_string.size; i++) {
650 s = hashtable_string.ptr[i];
654 tmpjs = (heapstring_t *) s->string;
655 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
657 s->hashlink = newhash.ptr[slot];
658 newhash.ptr[slot] = s;
660 /* follow link in external hash chain */
665 /* dispose old table */
667 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
668 hashtable_string = newhash;
671 LOCK_MONITOR_EXIT(lock_hashtable_string);
673 return (java_object_t *) js;
677 /* literalstring_new ***********************************************************
679 Creates a new literalstring with the text of the utf-symbol and inserts
680 it into the string hashtable.
682 *******************************************************************************/
684 java_object_t *literalstring_new(utf *u)
686 char *utf_ptr; /* pointer to current unicode character */
688 u4 utflength; /* length of utf-string if uncompressed */
689 java_chararray_t *a; /* u2-array constructed from utf string */
693 utflength = utf_get_number_of_u2s(u);
695 /* allocate memory */
696 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
698 /* convert utf-string to u2-array */
699 for (i = 0; i < utflength; i++)
700 a->data[i] = utf_nextu2(&utf_ptr);
702 return literalstring_u2(a, utflength, 0, false);
706 /* literalstring_free **********************************************************
708 Removes a literalstring from memory.
710 *******************************************************************************/
713 /* TWISTI This one is currently not used. */
715 static void literalstring_free(java_object_t* string)
720 s = (heapstring_t *) string;
723 /* dispose memory of java.lang.String object */
724 FREE(s, heapstring_t);
726 /* dispose memory of java-characterarray */
727 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
732 /* javastring_intern ***********************************************************
734 Intern the given Java string.
736 XXX NOTE: Literal Strings are direct references since they are not placed
737 onto the GC-Heap. That's why this function looks so "different".
739 *******************************************************************************/
741 java_handle_t *javastring_intern(java_handle_t *s)
743 java_lang_String *so;
744 java_chararray_t *value;
747 /* java_lang_String *o; */
748 java_object_t *o; /* XXX see note above */
750 so = (java_lang_String *) s;
752 value = LLNI_field_direct(so, value); /* XXX see note above */
753 LLNI_field_get_val(so, count, count);
754 LLNI_field_get_val(so, offset, offset);
756 o = literalstring_u2(value, count, offset, true);
758 return LLNI_WRAP(o); /* XXX see note above */
762 /* javastring_print ************************************************************
764 Print the given Java string.
766 *******************************************************************************/
768 void javastring_print(java_handle_t *s)
770 java_lang_String *so;
771 java_handle_chararray_t *value;
777 so = (java_lang_String *) s;
779 LLNI_field_get_ref(so, value, value);
780 LLNI_field_get_val(so, count, count);
781 LLNI_field_get_val(so, offset, offset);
783 for (i = offset; i < offset + count; i++) {
784 c = LLNI_array_direct(value, i);
791 * These are local overrides for various environment variables in Emacs.
792 * Please do not remove this and leave it at the end of the file, where
793 * Emacs will automagically detect them.
794 * ---------------------------------------------------------------------
797 * indent-tabs-mode: t
801 * vim:noexpandtab:sw=4:ts=4: