1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "vmcore/system.h"
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "native/include/java_lang_String.h"
43 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.h"
48 #include "vm/primitive.hpp"
49 #include "vm/stringlocal.h"
52 #include "vmcore/options.h"
53 #include "vmcore/statistics.h"
54 #include "vmcore/utf8.h"
57 /* global variables ***********************************************************/
59 /* hashsize must be power of 2 */
61 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
63 hashtable hashtable_string; /* hashtable for javastrings */
65 #if defined(ENABLE_THREADS)
66 static java_object_t *lock_hashtable_string;
70 /* XXX preliminary typedef, will be removed once string.c and utf8.c are
73 #if defined(ENABLE_HANDLES)
74 typedef heap_java_lang_String heapstring_t;
76 typedef java_lang_String heapstring_t;
80 /* string_init *****************************************************************
82 Initialize the string hashtable lock.
84 *******************************************************************************/
86 bool string_init(void)
88 TRACESUBSYSTEMINITIALIZATION("string_init");
90 /* create string (javastring) hashtable */
92 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
94 #if defined(ENABLE_THREADS)
95 /* create string hashtable lock object */
97 lock_hashtable_string = NEW(java_object_t);
99 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
102 /* everything's ok */
108 /* stringtable_update **********************************************************
110 Traverses the javastring hashtable and sets the vftbl-entries of
111 javastrings which were temporarily set to NULL, because
112 java.lang.Object was not yet loaded.
114 *******************************************************************************/
116 void stringtable_update(void)
120 literalstring *s; /* hashtable entry */
123 for (i = 0; i < hashtable_string.size; i++) {
124 s = hashtable_string.ptr[i];
127 js = (heapstring_t *) s->string;
129 if ((js == NULL) || (js->value == NULL)) {
130 /* error in hashtable found */
132 vm_abort("stringtable_update: invalid literalstring in hashtable");
137 if (!js->header.vftbl)
138 /* vftbl of javastring is NULL */
139 js->header.vftbl = class_java_lang_String->vftbl;
141 if (!a->header.objheader.vftbl)
142 /* vftbl of character-array is NULL */
143 a->header.objheader.vftbl =
144 Primitive_get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
146 /* follow link in external hash chain */
154 /* javastring_new_from_utf_buffer **********************************************
156 Create a new object of type java/lang/String with the text from
157 the specified utf8 buffer.
160 buffer.......points to first char in the buffer
161 blength......number of bytes to read from the buffer
164 the java.lang.String object, or
165 NULL if an exception has been thrown
167 *******************************************************************************/
169 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
172 const char *utf_ptr; /* current utf character in utf string */
173 u4 utflength; /* length of utf-string if uncompressed */
175 java_lang_String *s; /* result-string */
176 java_handle_chararray_t *a;
181 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
183 o = builtin_new(class_java_lang_String);
184 a = builtin_newarray_char(utflength);
186 /* javastring or character-array could not be created */
188 if ((o == NULL) || (a == NULL))
191 /* decompress utf-string */
195 for (i = 0; i < utflength; i++)
196 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
198 /* set fields of the javastring-object */
200 s = (java_lang_String *) o;
202 LLNI_field_set_ref(s, value , a);
203 LLNI_field_set_val(s, offset, 0);
204 LLNI_field_set_val(s, count , utflength);
210 /* javastring_safe_new_from_utf8 ***********************************************
212 Create a new object of type java/lang/String with the text from
213 the specified UTF-8 string. This function is safe for invalid UTF-8.
214 (Invalid characters will be replaced by U+fffd.)
217 text.........the UTF-8 string, zero-terminated.
220 the java.lang.String object, or
221 NULL if an exception has been thrown
223 *******************************************************************************/
225 java_handle_t *javastring_safe_new_from_utf8(const char *text)
228 java_handle_chararray_t *a;
236 /* Get number of bytes. We need this to completely emulate the messy */
237 /* behaviour of the RI. :( */
239 nbytes = strlen(text);
241 /* calculate number of Java characters */
243 len = utf8_safe_number_of_u2s(text, nbytes);
245 /* allocate the String object and the char array */
247 o = builtin_new(class_java_lang_String);
248 a = builtin_newarray_char(len);
250 /* javastring or character-array could not be created? */
252 if ((o == NULL) || (a == NULL))
255 /* decompress UTF-8 string */
257 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(a));
259 /* set fields of the String object */
261 s = (java_lang_String *) o;
263 LLNI_field_set_ref(s, value , a);
264 LLNI_field_set_val(s, offset, 0);
265 LLNI_field_set_val(s, count , len);
271 /* javastring_new_from_utf_string **********************************************
273 Create a new object of type java/lang/String with the text from
274 the specified zero-terminated utf8 string.
277 buffer.......points to first char in the buffer
278 blength......number of bytes to read from the buffer
281 the java.lang.String object, or
282 NULL if an exception has been thrown
284 *******************************************************************************/
286 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
290 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
294 /* javastring_new **************************************************************
296 creates a new object of type java/lang/String with the text of
297 the specified utf8-string
299 return: pointer to the string or NULL if memory is exhausted.
301 *******************************************************************************/
303 java_handle_t *javastring_new(utf *u)
305 char *utf_ptr; /* current utf character in utf string */
306 u4 utflength; /* length of utf-string if uncompressed */
308 java_handle_chararray_t *a;
313 exceptions_throw_nullpointerexception();
318 utflength = utf_get_number_of_u2s(u);
320 o = builtin_new(class_java_lang_String);
321 a = builtin_newarray_char(utflength);
323 /* javastring or character-array could not be created */
325 if ((o == NULL) || (a == NULL))
328 /* decompress utf-string */
330 for (i = 0; i < utflength; i++)
331 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
333 /* set fields of the javastring-object */
335 s = (java_lang_String *) o;
337 LLNI_field_set_ref(s, value , a);
338 LLNI_field_set_val(s, offset, 0);
339 LLNI_field_set_val(s, count , utflength);
345 /* javastring_new_slash_to_dot *************************************************
347 creates a new object of type java/lang/String with the text of
348 the specified utf8-string with slashes changed to dots
350 return: pointer to the string or NULL if memory is exhausted.
352 *******************************************************************************/
354 java_handle_t *javastring_new_slash_to_dot(utf *u)
356 char *utf_ptr; /* current utf character in utf string */
357 u4 utflength; /* length of utf-string if uncompressed */
359 java_handle_chararray_t *a;
365 exceptions_throw_nullpointerexception();
370 utflength = utf_get_number_of_u2s(u);
372 o = builtin_new(class_java_lang_String);
373 a = builtin_newarray_char(utflength);
375 /* javastring or character-array could not be created */
376 if ((o == NULL) || (a == NULL))
379 /* decompress utf-string */
381 for (i = 0; i < utflength; i++) {
382 ch = utf_nextu2(&utf_ptr);
385 LLNI_array_direct(a, i) = ch;
388 /* set fields of the javastring-object */
390 s = (java_lang_String *) o;
392 LLNI_field_set_ref(s, value , a);
393 LLNI_field_set_val(s, offset, 0);
394 LLNI_field_set_val(s, count , utflength);
400 /* javastring_new_from_ascii ***************************************************
402 creates a new java/lang/String object which contains the given ASCII
403 C-string converted to UTF-16.
406 text.........string of ASCII characters
409 the java.lang.String object, or
410 NULL if an exception has been thrown.
412 *******************************************************************************/
414 java_handle_t *javastring_new_from_ascii(const char *text)
417 s4 len; /* length of the string */
420 java_handle_chararray_t *a;
423 exceptions_throw_nullpointerexception();
429 o = builtin_new(class_java_lang_String);
430 a = builtin_newarray_char(len);
432 /* javastring or character-array could not be created */
434 if ((o == NULL) || (a == NULL))
439 for (i = 0; i < len; i++)
440 LLNI_array_direct(a, i) = text[i];
442 /* set fields of the javastring-object */
444 s = (java_lang_String *) o;
446 LLNI_field_set_ref(s, value , a);
447 LLNI_field_set_val(s, offset, 0);
448 LLNI_field_set_val(s, count , len);
454 /* javastring_tochar ***********************************************************
456 converts a Java string into a C string.
458 return: pointer to C string
460 Caution: calling method MUST release the allocated memory!
462 *******************************************************************************/
464 char *javastring_tochar(java_handle_t *so)
466 java_lang_String *s = (java_lang_String *) so;
467 java_handle_chararray_t *a;
476 LLNI_field_get_ref(s, value, a);
481 LLNI_field_get_val(s, count, count);
482 LLNI_field_get_val(s, offset, offset);
484 buf = MNEW(char, count + 1);
486 for (i = 0; i < count; i++)
487 buf[i] = LLNI_array_direct(a, offset + i);
495 /* javastring_toutf ************************************************************
497 Make utf symbol from javastring.
499 *******************************************************************************/
501 utf *javastring_toutf(java_handle_t *string, bool isclassname)
504 java_handle_chararray_t *value;
508 s = (java_lang_String *) string;
513 LLNI_field_get_ref(s, value, value);
518 LLNI_field_get_val(s, count, count);
519 LLNI_field_get_val(s, offset, offset);
521 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
525 /* literalstring_u2 ************************************************************
527 Searches for the literalstring with the specified u2-array in the
528 string hashtable, if there is no such string a new one is created.
530 If copymode is true a copy of the u2-array is made.
532 *******************************************************************************/
534 static java_object_t *literalstring_u2(java_chararray_t *a, u4 length,
535 u4 offset, bool copymode)
537 literalstring *s; /* hashtable element */
538 heapstring_t *js; /* u2-array wrapped in javastring */
539 java_chararray_t *ca; /* copy of u2-array */
544 LOCK_MONITOR_ENTER(lock_hashtable_string);
546 /* find location in hashtable */
548 key = unicode_hashkey(a->data + offset, length);
549 slot = key & (hashtable_string.size - 1);
550 s = hashtable_string.ptr[slot];
553 js = (heapstring_t *) s->string;
555 if (length == js->count) {
558 for (i = 0; i < length; i++)
559 if (a->data[offset + i] != js->value->data[i])
562 /* string already in hashtable, free memory */
565 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
567 LOCK_MONITOR_EXIT(lock_hashtable_string);
569 return (java_object_t *) js;
573 /* follow link in external hash chain */
578 /* create copy of u2-array for new javastring */
579 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
580 ca = mem_alloc(arraysize);
581 /* memcpy(ca, a, arraysize); */
582 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
583 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
589 /* location in hashtable found, complete arrayheader */
591 ca->header.objheader.vftbl =
592 Primitive_get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
593 ca->header.size = length;
595 assert(class_java_lang_String);
596 assert(class_java_lang_String->state & CLASS_LOADED);
598 /* create new javastring */
600 js = NEW(heapstring_t);
602 #if defined(ENABLE_STATISTICS)
604 size_string += sizeof(heapstring_t);
607 #if defined(ENABLE_THREADS)
608 lock_init_object_lock(&js->header);
611 js->header.vftbl = class_java_lang_String->vftbl;
616 /* create new literalstring */
618 s = NEW(literalstring);
620 #if defined(ENABLE_STATISTICS)
622 size_string += sizeof(literalstring);
625 s->hashlink = hashtable_string.ptr[slot];
626 s->string = (java_object_t *) js;
627 hashtable_string.ptr[slot] = s;
629 /* update number of hashtable entries */
631 hashtable_string.entries++;
633 /* reorganization of hashtable */
635 if (hashtable_string.entries > (hashtable_string.size * 2)) {
636 /* reorganization of hashtable, average length of the external
637 chains is approx. 2 */
641 literalstring *nexts;
643 hashtable newhash; /* the new hashtable */
645 /* create new hashtable, double the size */
647 hashtable_create(&newhash, hashtable_string.size * 2);
648 newhash.entries = hashtable_string.entries;
650 /* transfer elements to new hashtable */
652 for (i = 0; i < hashtable_string.size; i++) {
653 s = hashtable_string.ptr[i];
657 tmpjs = (heapstring_t *) s->string;
658 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
660 s->hashlink = newhash.ptr[slot];
661 newhash.ptr[slot] = s;
663 /* follow link in external hash chain */
668 /* dispose old table */
670 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
671 hashtable_string = newhash;
674 LOCK_MONITOR_EXIT(lock_hashtable_string);
676 return (java_object_t *) js;
680 /* literalstring_new ***********************************************************
682 Creates a new literalstring with the text of the utf-symbol and inserts
683 it into the string hashtable.
685 *******************************************************************************/
687 java_object_t *literalstring_new(utf *u)
689 char *utf_ptr; /* pointer to current unicode character */
691 u4 utflength; /* length of utf-string if uncompressed */
692 java_chararray_t *a; /* u2-array constructed from utf string */
696 utflength = utf_get_number_of_u2s(u);
698 /* allocate memory */
699 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
701 /* convert utf-string to u2-array */
702 for (i = 0; i < utflength; i++)
703 a->data[i] = utf_nextu2(&utf_ptr);
705 return literalstring_u2(a, utflength, 0, false);
709 /* literalstring_free **********************************************************
711 Removes a literalstring from memory.
713 *******************************************************************************/
716 /* TWISTI This one is currently not used. */
718 static void literalstring_free(java_object_t* string)
723 s = (heapstring_t *) string;
726 /* dispose memory of java.lang.String object */
727 FREE(s, heapstring_t);
729 /* dispose memory of java-characterarray */
730 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
735 /* javastring_intern ***********************************************************
737 Intern the given Java string.
739 XXX NOTE: Literal Strings are direct references since they are not placed
740 onto the GC-Heap. That's why this function looks so "different".
742 *******************************************************************************/
744 java_handle_t *javastring_intern(java_handle_t *s)
746 java_lang_String *so;
747 java_chararray_t *value;
750 /* java_lang_String *o; */
751 java_object_t *o; /* XXX see note above */
753 so = (java_lang_String *) s;
755 value = LLNI_field_direct(so, value); /* XXX see note above */
756 LLNI_field_get_val(so, count, count);
757 LLNI_field_get_val(so, offset, offset);
759 o = literalstring_u2(value, count, offset, true);
761 return LLNI_WRAP(o); /* XXX see note above */
765 /* javastring_fprint ***********************************************************
767 Print the given Java string to the given stream.
769 *******************************************************************************/
771 void javastring_fprint(java_handle_t *s, FILE *stream)
773 java_lang_String *so;
774 java_handle_chararray_t *value;
780 so = (java_lang_String *) s;
782 LLNI_field_get_ref(so, value, value);
783 LLNI_field_get_val(so, count, count);
784 LLNI_field_get_val(so, offset, offset);
786 for (i = offset; i < offset + count; i++) {
787 c = LLNI_array_direct(value, i);
794 * These are local overrides for various environment variables in Emacs.
795 * Please do not remove this and leave it at the end of the file, where
796 * Emacs will automagically detect them.
797 * ---------------------------------------------------------------------
800 * indent-tabs-mode: t
804 * vim:noexpandtab:sw=4:ts=4: