1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "vmcore/system.h"
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "native/include/java_lang_String.h"
43 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.hpp"
48 #include "vm/primitive.hpp"
49 #include "vm/string.hpp"
52 #include "vmcore/globals.hpp"
53 #include "vmcore/options.h"
54 #include "vmcore/statistics.h"
55 #include "vmcore/utf8.h"
58 /* global variables ***********************************************************/
60 /* hashsize must be power of 2 */
62 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
64 hashtable hashtable_string; /* hashtable for javastrings */
66 #if defined(ENABLE_THREADS)
67 static java_object_t *lock_hashtable_string;
71 /* XXX preliminary typedef, will be removed once string.c and utf8.c are
74 #if defined(ENABLE_HANDLES)
75 typedef heap_java_lang_String heapstring_t;
77 typedef java_lang_String heapstring_t;
81 /* string_init *****************************************************************
83 Initialize the string hashtable lock.
85 *******************************************************************************/
87 bool string_init(void)
89 TRACESUBSYSTEMINITIALIZATION("string_init");
91 /* create string (javastring) hashtable */
93 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
95 #if defined(ENABLE_THREADS)
96 /* create string hashtable lock object */
98 lock_hashtable_string = NEW(java_object_t);
100 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
103 /* everything's ok */
109 /* stringtable_update **********************************************************
111 Traverses the javastring hashtable and sets the vftbl-entries of
112 javastrings which were temporarily set to NULL, because
113 java.lang.Object was not yet loaded.
115 *******************************************************************************/
117 void stringtable_update(void)
121 literalstring *s; /* hashtable entry */
123 for (unsigned int i = 0; i < hashtable_string.size; i++) {
124 s = (literalstring*) hashtable_string.ptr[i];
128 js = (heapstring_t *) s->string;
130 if ((js == NULL) || (js->value == NULL)) {
131 /* error in hashtable found */
133 vm_abort("stringtable_update: invalid literalstring in hashtable");
138 if (!js->header.vftbl)
139 /* vftbl of javastring is NULL */
140 js->header.vftbl = class_java_lang_String->vftbl;
142 if (!a->header.objheader.vftbl)
143 /* vftbl of character-array is NULL */
144 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
146 /* follow link in external hash chain */
154 /* javastring_new_from_utf_buffer **********************************************
156 Create a new object of type java/lang/String with the text from
157 the specified utf8 buffer.
160 buffer.......points to first char in the buffer
161 blength......number of bytes to read from the buffer
164 the java.lang.String object, or
165 NULL if an exception has been thrown
167 *******************************************************************************/
169 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
172 const char *utf_ptr; /* current utf character in utf string */
173 u4 utflength; /* length of utf-string if uncompressed */
175 java_lang_String *s; /* result-string */
176 java_handle_chararray_t *a;
181 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
183 o = builtin_new(class_java_lang_String);
184 a = builtin_newarray_char(utflength);
186 /* javastring or character-array could not be created */
188 if ((o == NULL) || (a == NULL))
191 /* decompress utf-string */
195 for (i = 0; i < utflength; i++)
196 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
198 /* set fields of the javastring-object */
200 s = (java_lang_String *) o;
202 LLNI_field_set_ref(s, value , a);
203 LLNI_field_set_val(s, offset, 0);
204 LLNI_field_set_val(s, count , utflength);
210 /* javastring_safe_new_from_utf8 ***********************************************
212 Create a new object of type java/lang/String with the text from
213 the specified UTF-8 string. This function is safe for invalid UTF-8.
214 (Invalid characters will be replaced by U+fffd.)
217 text.........the UTF-8 string, zero-terminated.
220 the java.lang.String object, or
221 NULL if an exception has been thrown
223 *******************************************************************************/
225 java_handle_t *javastring_safe_new_from_utf8(const char *text)
228 java_handle_chararray_t *a;
236 /* Get number of bytes. We need this to completely emulate the messy */
237 /* behaviour of the RI. :( */
239 nbytes = strlen(text);
241 /* calculate number of Java characters */
243 len = utf8_safe_number_of_u2s(text, nbytes);
245 /* allocate the String object and the char array */
247 o = builtin_new(class_java_lang_String);
248 a = builtin_newarray_char(len);
250 /* javastring or character-array could not be created? */
252 if ((o == NULL) || (a == NULL))
255 /* decompress UTF-8 string */
257 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(a));
259 /* set fields of the String object */
261 s = (java_lang_String *) o;
263 LLNI_field_set_ref(s, value , a);
264 LLNI_field_set_val(s, offset, 0);
265 LLNI_field_set_val(s, count , len);
271 /* javastring_new_from_utf_string **********************************************
273 Create a new object of type java/lang/String with the text from
274 the specified zero-terminated utf8 string.
277 buffer.......points to first char in the buffer
278 blength......number of bytes to read from the buffer
281 the java.lang.String object, or
282 NULL if an exception has been thrown
284 *******************************************************************************/
286 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
290 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
294 /* javastring_new **************************************************************
296 creates a new object of type java/lang/String with the text of
297 the specified utf8-string
299 return: pointer to the string or NULL if memory is exhausted.
301 *******************************************************************************/
303 java_handle_t *javastring_new(utf *u)
305 char *utf_ptr; /* current utf character in utf string */
306 int32_t utflength; /* length of utf-string if uncompressed */
308 java_handle_chararray_t *a;
312 exceptions_throw_nullpointerexception();
317 utflength = utf_get_number_of_u2s(u);
319 o = builtin_new(class_java_lang_String);
320 a = builtin_newarray_char(utflength);
322 /* javastring or character-array could not be created */
324 if ((o == NULL) || (a == NULL))
327 /* decompress utf-string */
329 for (int32_t i = 0; i < utflength; i++)
330 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
332 /* set fields of the javastring-object */
334 s = (java_lang_String *) o;
336 LLNI_field_set_ref(s, value , a);
337 LLNI_field_set_val(s, offset, 0);
338 LLNI_field_set_val(s, count , utflength);
344 /* javastring_new_slash_to_dot *************************************************
346 creates a new object of type java/lang/String with the text of
347 the specified utf8-string with slashes changed to dots
349 return: pointer to the string or NULL if memory is exhausted.
351 *******************************************************************************/
353 java_handle_t *javastring_new_slash_to_dot(utf *u)
355 char *utf_ptr; /* current utf character in utf string */
356 int32_t utflength; /* length of utf-string if uncompressed */
358 java_handle_chararray_t *a;
363 exceptions_throw_nullpointerexception();
368 utflength = utf_get_number_of_u2s(u);
370 o = builtin_new(class_java_lang_String);
371 a = builtin_newarray_char(utflength);
373 /* javastring or character-array could not be created */
374 if ((o == NULL) || (a == NULL))
377 /* decompress utf-string */
379 for (int32_t i = 0; i < utflength; i++) {
380 ch = utf_nextu2(&utf_ptr);
383 LLNI_array_direct(a, i) = ch;
386 /* set fields of the javastring-object */
388 s = (java_lang_String *) o;
390 LLNI_field_set_ref(s, value , a);
391 LLNI_field_set_val(s, offset, 0);
392 LLNI_field_set_val(s, count , utflength);
398 /* javastring_new_from_ascii ***************************************************
400 creates a new java/lang/String object which contains the given ASCII
401 C-string converted to UTF-16.
404 text.........string of ASCII characters
407 the java.lang.String object, or
408 NULL if an exception has been thrown.
410 *******************************************************************************/
412 java_handle_t *javastring_new_from_ascii(const char *text)
415 s4 len; /* length of the string */
418 java_handle_chararray_t *a;
421 exceptions_throw_nullpointerexception();
427 o = builtin_new(class_java_lang_String);
428 a = builtin_newarray_char(len);
430 /* javastring or character-array could not be created */
432 if ((o == NULL) || (a == NULL))
437 for (i = 0; i < len; i++)
438 LLNI_array_direct(a, i) = text[i];
440 /* set fields of the javastring-object */
442 s = (java_lang_String *) o;
444 LLNI_field_set_ref(s, value , a);
445 LLNI_field_set_val(s, offset, 0);
446 LLNI_field_set_val(s, count , len);
452 /* javastring_tochar ***********************************************************
454 converts a Java string into a C string.
456 return: pointer to C string
458 Caution: calling method MUST release the allocated memory!
460 *******************************************************************************/
462 char *javastring_tochar(java_handle_t *so)
464 java_lang_String *s = (java_lang_String *) so;
465 java_handle_chararray_t *a;
474 LLNI_field_get_ref(s, value, a);
479 LLNI_field_get_val(s, count, count);
480 LLNI_field_get_val(s, offset, offset);
482 buf = MNEW(char, count + 1);
484 for (i = 0; i < count; i++)
485 buf[i] = LLNI_array_direct(a, offset + i);
493 /* javastring_toutf ************************************************************
495 Make utf symbol from javastring.
497 *******************************************************************************/
499 utf *javastring_toutf(java_handle_t *string, bool isclassname)
502 java_handle_chararray_t *value;
506 s = (java_lang_String *) string;
511 LLNI_field_get_ref(s, value, value);
516 LLNI_field_get_val(s, count, count);
517 LLNI_field_get_val(s, offset, offset);
519 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
523 /* literalstring_u2 ************************************************************
525 Searches for the literalstring with the specified u2-array in the
526 string hashtable, if there is no such string a new one is created.
528 If copymode is true a copy of the u2-array is made.
530 *******************************************************************************/
532 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
533 u4 offset, bool copymode)
535 literalstring *s; /* hashtable element */
536 heapstring_t *js; /* u2-array wrapped in javastring */
537 java_chararray_t *ca; /* copy of u2-array */
542 LOCK_MONITOR_ENTER(lock_hashtable_string);
544 /* find location in hashtable */
546 key = unicode_hashkey(a->data + offset, length);
547 slot = key & (hashtable_string.size - 1);
548 s = (literalstring*) hashtable_string.ptr[slot];
551 js = (heapstring_t *) s->string;
553 if (length == js->count) {
556 for (i = 0; i < length; i++)
557 if (a->data[offset + i] != js->value->data[i])
560 /* string already in hashtable, free memory */
563 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
565 LOCK_MONITOR_EXIT(lock_hashtable_string);
567 return (java_object_t *) js;
571 /* follow link in external hash chain */
576 /* create copy of u2-array for new javastring */
577 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
578 ca = (java_chararray_t*) mem_alloc(arraysize);
579 /* memcpy(ca, a, arraysize); */
580 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
581 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
587 /* location in hashtable found, complete arrayheader */
589 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
590 ca->header.size = length;
592 assert(class_java_lang_String);
593 assert(class_java_lang_String->state & CLASS_LOADED);
595 /* create new javastring */
597 js = NEW(heapstring_t);
599 #if defined(ENABLE_STATISTICS)
601 size_string += sizeof(heapstring_t);
604 #if defined(ENABLE_THREADS)
605 lock_init_object_lock(&js->header);
608 js->header.vftbl = class_java_lang_String->vftbl;
613 /* create new literalstring */
615 s = NEW(literalstring);
617 #if defined(ENABLE_STATISTICS)
619 size_string += sizeof(literalstring);
622 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
623 s->string = (java_object_t *) js;
624 hashtable_string.ptr[slot] = s;
626 /* update number of hashtable entries */
628 hashtable_string.entries++;
630 /* reorganization of hashtable */
632 if (hashtable_string.entries > (hashtable_string.size * 2)) {
633 /* reorganization of hashtable, average length of the external
634 chains is approx. 2 */
638 literalstring *nexts;
640 hashtable newhash; /* the new hashtable */
642 /* create new hashtable, double the size */
644 hashtable_create(&newhash, hashtable_string.size * 2);
645 newhash.entries = hashtable_string.entries;
647 /* transfer elements to new hashtable */
649 for (i = 0; i < hashtable_string.size; i++) {
650 s = (literalstring*) hashtable_string.ptr[i];
654 tmpjs = (heapstring_t *) s->string;
655 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
657 s->hashlink = (literalstring*) newhash.ptr[slot];
658 newhash.ptr[slot] = s;
660 /* follow link in external hash chain */
665 /* dispose old table */
667 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
668 hashtable_string = newhash;
671 LOCK_MONITOR_EXIT(lock_hashtable_string);
673 return (java_object_t *) js;
677 /* literalstring_new ***********************************************************
679 Creates a new literalstring with the text of the utf-symbol and inserts
680 it into the string hashtable.
682 *******************************************************************************/
684 java_object_t *literalstring_new(utf *u)
686 char *utf_ptr; /* pointer to current unicode character */
688 u4 utflength; /* length of utf-string if uncompressed */
689 java_chararray_t *a; /* u2-array constructed from utf string */
693 utflength = utf_get_number_of_u2s(u);
695 /* allocate memory */
696 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
698 /* convert utf-string to u2-array */
699 for (i = 0; i < utflength; i++)
700 a->data[i] = utf_nextu2(&utf_ptr);
702 return literalstring_u2(a, utflength, 0, false);
706 /* literalstring_free **********************************************************
708 Removes a literalstring from memory.
710 *******************************************************************************/
713 /* TWISTI This one is currently not used. */
715 static void literalstring_free(java_object_t* string)
720 s = (heapstring_t *) string;
723 /* dispose memory of java.lang.String object */
724 FREE(s, heapstring_t);
726 /* dispose memory of java-characterarray */
727 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
732 /* javastring_intern ***********************************************************
734 Intern the given Java string.
736 XXX NOTE: Literal Strings are direct references since they are not placed
737 onto the GC-Heap. That's why this function looks so "different".
739 *******************************************************************************/
741 java_handle_t *javastring_intern(java_handle_t *s)
743 java_lang_String *so;
744 java_chararray_t *value;
747 /* java_lang_String *o; */
748 java_object_t *o; /* XXX see note above */
750 so = (java_lang_String *) s;
752 value = LLNI_field_direct(so, value); /* XXX see note above */
753 LLNI_field_get_val(so, count, count);
754 LLNI_field_get_val(so, offset, offset);
756 o = literalstring_u2(value, count, offset, true);
758 return LLNI_WRAP(o); /* XXX see note above */
762 /* javastring_fprint ***********************************************************
764 Print the given Java string to the given stream.
766 *******************************************************************************/
768 void javastring_fprint(java_handle_t *s, FILE *stream)
770 java_lang_String *so;
771 java_handle_chararray_t *value;
777 so = (java_lang_String *) s;
779 LLNI_field_get_ref(so, value, value);
780 LLNI_field_get_val(so, count, count);
781 LLNI_field_get_val(so, offset, offset);
783 for (i = offset; i < offset + count; i++) {
784 c = LLNI_array_direct(value, i);
791 * These are local overrides for various environment variables in Emacs.
792 * Please do not remove this and leave it at the end of the file, where
793 * Emacs will automagically detect them.
794 * ---------------------------------------------------------------------
797 * indent-tabs-mode: t
801 * vim:noexpandtab:sw=4:ts=4: