1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "vmcore/system.h"
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "native/include/java_lang_String.h"
43 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.h"
48 #include "vm/primitive.hpp"
49 #include "vm/stringlocal.h"
52 #include "vmcore/globals.hpp"
53 #include "vmcore/options.h"
54 #include "vmcore/statistics.h"
55 #include "vmcore/utf8.h"
58 /* global variables ***********************************************************/
60 /* hashsize must be power of 2 */
62 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
64 hashtable hashtable_string; /* hashtable for javastrings */
66 #if defined(ENABLE_THREADS)
67 static java_object_t *lock_hashtable_string;
71 /* XXX preliminary typedef, will be removed once string.c and utf8.c are
74 #if defined(ENABLE_HANDLES)
75 typedef heap_java_lang_String heapstring_t;
77 typedef java_lang_String heapstring_t;
81 /* string_init *****************************************************************
83 Initialize the string hashtable lock.
85 *******************************************************************************/
87 bool string_init(void)
89 TRACESUBSYSTEMINITIALIZATION("string_init");
91 /* create string (javastring) hashtable */
93 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
95 #if defined(ENABLE_THREADS)
96 /* create string hashtable lock object */
98 lock_hashtable_string = NEW(java_object_t);
100 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
103 /* everything's ok */
109 /* stringtable_update **********************************************************
111 Traverses the javastring hashtable and sets the vftbl-entries of
112 javastrings which were temporarily set to NULL, because
113 java.lang.Object was not yet loaded.
115 *******************************************************************************/
117 void stringtable_update(void)
121 literalstring *s; /* hashtable entry */
124 for (i = 0; i < hashtable_string.size; i++) {
125 s = hashtable_string.ptr[i];
128 js = (heapstring_t *) s->string;
130 if ((js == NULL) || (js->value == NULL)) {
131 /* error in hashtable found */
133 vm_abort("stringtable_update: invalid literalstring in hashtable");
138 if (!js->header.vftbl)
139 /* vftbl of javastring is NULL */
140 js->header.vftbl = class_java_lang_String->vftbl;
142 if (!a->header.objheader.vftbl)
143 /* vftbl of character-array is NULL */
144 a->header.objheader.vftbl =
145 Primitive_get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
147 /* follow link in external hash chain */
155 /* javastring_new_from_utf_buffer **********************************************
157 Create a new object of type java/lang/String with the text from
158 the specified utf8 buffer.
161 buffer.......points to first char in the buffer
162 blength......number of bytes to read from the buffer
165 the java.lang.String object, or
166 NULL if an exception has been thrown
168 *******************************************************************************/
170 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
173 const char *utf_ptr; /* current utf character in utf string */
174 u4 utflength; /* length of utf-string if uncompressed */
176 java_lang_String *s; /* result-string */
177 java_handle_chararray_t *a;
182 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
184 o = builtin_new(class_java_lang_String);
185 a = builtin_newarray_char(utflength);
187 /* javastring or character-array could not be created */
189 if ((o == NULL) || (a == NULL))
192 /* decompress utf-string */
196 for (i = 0; i < utflength; i++)
197 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
199 /* set fields of the javastring-object */
201 s = (java_lang_String *) o;
203 LLNI_field_set_ref(s, value , a);
204 LLNI_field_set_val(s, offset, 0);
205 LLNI_field_set_val(s, count , utflength);
211 /* javastring_safe_new_from_utf8 ***********************************************
213 Create a new object of type java/lang/String with the text from
214 the specified UTF-8 string. This function is safe for invalid UTF-8.
215 (Invalid characters will be replaced by U+fffd.)
218 text.........the UTF-8 string, zero-terminated.
221 the java.lang.String object, or
222 NULL if an exception has been thrown
224 *******************************************************************************/
226 java_handle_t *javastring_safe_new_from_utf8(const char *text)
229 java_handle_chararray_t *a;
237 /* Get number of bytes. We need this to completely emulate the messy */
238 /* behaviour of the RI. :( */
240 nbytes = strlen(text);
242 /* calculate number of Java characters */
244 len = utf8_safe_number_of_u2s(text, nbytes);
246 /* allocate the String object and the char array */
248 o = builtin_new(class_java_lang_String);
249 a = builtin_newarray_char(len);
251 /* javastring or character-array could not be created? */
253 if ((o == NULL) || (a == NULL))
256 /* decompress UTF-8 string */
258 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(a));
260 /* set fields of the String object */
262 s = (java_lang_String *) o;
264 LLNI_field_set_ref(s, value , a);
265 LLNI_field_set_val(s, offset, 0);
266 LLNI_field_set_val(s, count , len);
272 /* javastring_new_from_utf_string **********************************************
274 Create a new object of type java/lang/String with the text from
275 the specified zero-terminated utf8 string.
278 buffer.......points to first char in the buffer
279 blength......number of bytes to read from the buffer
282 the java.lang.String object, or
283 NULL if an exception has been thrown
285 *******************************************************************************/
287 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
291 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
295 /* javastring_new **************************************************************
297 creates a new object of type java/lang/String with the text of
298 the specified utf8-string
300 return: pointer to the string or NULL if memory is exhausted.
302 *******************************************************************************/
304 java_handle_t *javastring_new(utf *u)
306 char *utf_ptr; /* current utf character in utf string */
307 u4 utflength; /* length of utf-string if uncompressed */
309 java_handle_chararray_t *a;
314 exceptions_throw_nullpointerexception();
319 utflength = utf_get_number_of_u2s(u);
321 o = builtin_new(class_java_lang_String);
322 a = builtin_newarray_char(utflength);
324 /* javastring or character-array could not be created */
326 if ((o == NULL) || (a == NULL))
329 /* decompress utf-string */
331 for (i = 0; i < utflength; i++)
332 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
334 /* set fields of the javastring-object */
336 s = (java_lang_String *) o;
338 LLNI_field_set_ref(s, value , a);
339 LLNI_field_set_val(s, offset, 0);
340 LLNI_field_set_val(s, count , utflength);
346 /* javastring_new_slash_to_dot *************************************************
348 creates a new object of type java/lang/String with the text of
349 the specified utf8-string with slashes changed to dots
351 return: pointer to the string or NULL if memory is exhausted.
353 *******************************************************************************/
355 java_handle_t *javastring_new_slash_to_dot(utf *u)
357 char *utf_ptr; /* current utf character in utf string */
358 u4 utflength; /* length of utf-string if uncompressed */
360 java_handle_chararray_t *a;
366 exceptions_throw_nullpointerexception();
371 utflength = utf_get_number_of_u2s(u);
373 o = builtin_new(class_java_lang_String);
374 a = builtin_newarray_char(utflength);
376 /* javastring or character-array could not be created */
377 if ((o == NULL) || (a == NULL))
380 /* decompress utf-string */
382 for (i = 0; i < utflength; i++) {
383 ch = utf_nextu2(&utf_ptr);
386 LLNI_array_direct(a, i) = ch;
389 /* set fields of the javastring-object */
391 s = (java_lang_String *) o;
393 LLNI_field_set_ref(s, value , a);
394 LLNI_field_set_val(s, offset, 0);
395 LLNI_field_set_val(s, count , utflength);
401 /* javastring_new_from_ascii ***************************************************
403 creates a new java/lang/String object which contains the given ASCII
404 C-string converted to UTF-16.
407 text.........string of ASCII characters
410 the java.lang.String object, or
411 NULL if an exception has been thrown.
413 *******************************************************************************/
415 java_handle_t *javastring_new_from_ascii(const char *text)
418 s4 len; /* length of the string */
421 java_handle_chararray_t *a;
424 exceptions_throw_nullpointerexception();
430 o = builtin_new(class_java_lang_String);
431 a = builtin_newarray_char(len);
433 /* javastring or character-array could not be created */
435 if ((o == NULL) || (a == NULL))
440 for (i = 0; i < len; i++)
441 LLNI_array_direct(a, i) = text[i];
443 /* set fields of the javastring-object */
445 s = (java_lang_String *) o;
447 LLNI_field_set_ref(s, value , a);
448 LLNI_field_set_val(s, offset, 0);
449 LLNI_field_set_val(s, count , len);
455 /* javastring_tochar ***********************************************************
457 converts a Java string into a C string.
459 return: pointer to C string
461 Caution: calling method MUST release the allocated memory!
463 *******************************************************************************/
465 char *javastring_tochar(java_handle_t *so)
467 java_lang_String *s = (java_lang_String *) so;
468 java_handle_chararray_t *a;
477 LLNI_field_get_ref(s, value, a);
482 LLNI_field_get_val(s, count, count);
483 LLNI_field_get_val(s, offset, offset);
485 buf = MNEW(char, count + 1);
487 for (i = 0; i < count; i++)
488 buf[i] = LLNI_array_direct(a, offset + i);
496 /* javastring_toutf ************************************************************
498 Make utf symbol from javastring.
500 *******************************************************************************/
502 utf *javastring_toutf(java_handle_t *string, bool isclassname)
505 java_handle_chararray_t *value;
509 s = (java_lang_String *) string;
514 LLNI_field_get_ref(s, value, value);
519 LLNI_field_get_val(s, count, count);
520 LLNI_field_get_val(s, offset, offset);
522 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
526 /* literalstring_u2 ************************************************************
528 Searches for the literalstring with the specified u2-array in the
529 string hashtable, if there is no such string a new one is created.
531 If copymode is true a copy of the u2-array is made.
533 *******************************************************************************/
535 static java_object_t *literalstring_u2(java_chararray_t *a, u4 length,
536 u4 offset, bool copymode)
538 literalstring *s; /* hashtable element */
539 heapstring_t *js; /* u2-array wrapped in javastring */
540 java_chararray_t *ca; /* copy of u2-array */
545 LOCK_MONITOR_ENTER(lock_hashtable_string);
547 /* find location in hashtable */
549 key = unicode_hashkey(a->data + offset, length);
550 slot = key & (hashtable_string.size - 1);
551 s = hashtable_string.ptr[slot];
554 js = (heapstring_t *) s->string;
556 if (length == js->count) {
559 for (i = 0; i < length; i++)
560 if (a->data[offset + i] != js->value->data[i])
563 /* string already in hashtable, free memory */
566 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
568 LOCK_MONITOR_EXIT(lock_hashtable_string);
570 return (java_object_t *) js;
574 /* follow link in external hash chain */
579 /* create copy of u2-array for new javastring */
580 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
581 ca = mem_alloc(arraysize);
582 /* memcpy(ca, a, arraysize); */
583 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
584 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
590 /* location in hashtable found, complete arrayheader */
592 ca->header.objheader.vftbl =
593 Primitive_get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
594 ca->header.size = length;
596 assert(class_java_lang_String);
597 assert(class_java_lang_String->state & CLASS_LOADED);
599 /* create new javastring */
601 js = NEW(heapstring_t);
603 #if defined(ENABLE_STATISTICS)
605 size_string += sizeof(heapstring_t);
608 #if defined(ENABLE_THREADS)
609 lock_init_object_lock(&js->header);
612 js->header.vftbl = class_java_lang_String->vftbl;
617 /* create new literalstring */
619 s = NEW(literalstring);
621 #if defined(ENABLE_STATISTICS)
623 size_string += sizeof(literalstring);
626 s->hashlink = hashtable_string.ptr[slot];
627 s->string = (java_object_t *) js;
628 hashtable_string.ptr[slot] = s;
630 /* update number of hashtable entries */
632 hashtable_string.entries++;
634 /* reorganization of hashtable */
636 if (hashtable_string.entries > (hashtable_string.size * 2)) {
637 /* reorganization of hashtable, average length of the external
638 chains is approx. 2 */
642 literalstring *nexts;
644 hashtable newhash; /* the new hashtable */
646 /* create new hashtable, double the size */
648 hashtable_create(&newhash, hashtable_string.size * 2);
649 newhash.entries = hashtable_string.entries;
651 /* transfer elements to new hashtable */
653 for (i = 0; i < hashtable_string.size; i++) {
654 s = hashtable_string.ptr[i];
658 tmpjs = (heapstring_t *) s->string;
659 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
661 s->hashlink = newhash.ptr[slot];
662 newhash.ptr[slot] = s;
664 /* follow link in external hash chain */
669 /* dispose old table */
671 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
672 hashtable_string = newhash;
675 LOCK_MONITOR_EXIT(lock_hashtable_string);
677 return (java_object_t *) js;
681 /* literalstring_new ***********************************************************
683 Creates a new literalstring with the text of the utf-symbol and inserts
684 it into the string hashtable.
686 *******************************************************************************/
688 java_object_t *literalstring_new(utf *u)
690 char *utf_ptr; /* pointer to current unicode character */
692 u4 utflength; /* length of utf-string if uncompressed */
693 java_chararray_t *a; /* u2-array constructed from utf string */
697 utflength = utf_get_number_of_u2s(u);
699 /* allocate memory */
700 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
702 /* convert utf-string to u2-array */
703 for (i = 0; i < utflength; i++)
704 a->data[i] = utf_nextu2(&utf_ptr);
706 return literalstring_u2(a, utflength, 0, false);
710 /* literalstring_free **********************************************************
712 Removes a literalstring from memory.
714 *******************************************************************************/
717 /* TWISTI This one is currently not used. */
719 static void literalstring_free(java_object_t* string)
724 s = (heapstring_t *) string;
727 /* dispose memory of java.lang.String object */
728 FREE(s, heapstring_t);
730 /* dispose memory of java-characterarray */
731 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
736 /* javastring_intern ***********************************************************
738 Intern the given Java string.
740 XXX NOTE: Literal Strings are direct references since they are not placed
741 onto the GC-Heap. That's why this function looks so "different".
743 *******************************************************************************/
745 java_handle_t *javastring_intern(java_handle_t *s)
747 java_lang_String *so;
748 java_chararray_t *value;
751 /* java_lang_String *o; */
752 java_object_t *o; /* XXX see note above */
754 so = (java_lang_String *) s;
756 value = LLNI_field_direct(so, value); /* XXX see note above */
757 LLNI_field_get_val(so, count, count);
758 LLNI_field_get_val(so, offset, offset);
760 o = literalstring_u2(value, count, offset, true);
762 return LLNI_WRAP(o); /* XXX see note above */
766 /* javastring_fprint ***********************************************************
768 Print the given Java string to the given stream.
770 *******************************************************************************/
772 void javastring_fprint(java_handle_t *s, FILE *stream)
774 java_lang_String *so;
775 java_handle_chararray_t *value;
781 so = (java_lang_String *) s;
783 LLNI_field_get_ref(so, value, value);
784 LLNI_field_get_val(so, count, count);
785 LLNI_field_get_val(so, offset, offset);
787 for (i = offset; i < offset + count; i++) {
788 c = LLNI_array_direct(value, i);
795 * These are local overrides for various environment variables in Emacs.
796 * Please do not remove this and leave it at the end of the file, where
797 * Emacs will automagically detect them.
798 * ---------------------------------------------------------------------
801 * indent-tabs-mode: t
805 * vim:noexpandtab:sw=4:ts=4: