1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "threads/lock-common.h"
44 #include "vm/builtin.h"
45 #include "vm/exceptions.hpp"
46 #include "vm/globals.hpp"
47 #include "vm/javaobjects.hpp"
48 #include "vm/options.h"
49 #include "vm/primitive.hpp"
50 #include "vm/statistics.h"
51 #include "vm/string.hpp"
56 /* global variables ***********************************************************/
58 /* hashsize must be power of 2 */
60 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
62 hashtable hashtable_string; /* hashtable for javastrings */
64 #if defined(ENABLE_THREADS)
65 static java_object_t *lock_hashtable_string;
69 /* string_init *****************************************************************
71 Initialize the string hashtable lock.
73 *******************************************************************************/
75 bool string_init(void)
77 TRACESUBSYSTEMINITIALIZATION("string_init");
79 /* create string (javastring) hashtable */
81 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
83 #if defined(ENABLE_THREADS)
84 /* create string hashtable lock object */
86 lock_hashtable_string = NEW(java_object_t);
88 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
97 /* stringtable_update **********************************************************
99 Traverses the javastring hashtable and sets the vftbl-entries of
100 javastrings which were temporarily set to NULL, because
101 java.lang.Object was not yet loaded.
103 *******************************************************************************/
105 void stringtable_update(void)
108 literalstring *s; /* hashtable entry */
110 for (unsigned int i = 0; i < hashtable_string.size; i++) {
111 s = (literalstring*) hashtable_string.ptr[i];
116 java_lang_String js(LLNI_WRAP(s->string));
118 if (js.is_null() || (js.get_value() == NULL)) {
119 /* error in hashtable found */
121 vm_abort("stringtable_update: invalid literalstring in hashtable");
124 a = LLNI_UNWRAP(js.get_value());
126 if (js.get_vftbl() == NULL)
128 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
130 if (a->header.objheader.vftbl == NULL)
131 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
133 /* follow link in external hash chain */
141 /* javastring_new_from_utf_buffer **********************************************
143 Create a new object of type java/lang/String with the text from
144 the specified utf8 buffer.
147 buffer.......points to first char in the buffer
148 blength......number of bytes to read from the buffer
151 the java.lang.String object, or
152 NULL if an exception has been thrown
154 *******************************************************************************/
156 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
158 const char *utf_ptr; /* current utf character in utf string */
162 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
164 java_handle_t* h = builtin_new(class_java_lang_String);
165 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
167 /* javastring or character-array could not be created */
169 if ((h == NULL) || (ca == NULL))
172 /* decompress utf-string */
176 for (int32_t i = 0; i < utflength; i++)
177 LLNI_array_direct(ca, i) = utf_nextu2((char **) &utf_ptr);
179 /* set fields of the javastring-object */
181 java_lang_String jls(h, ca, utflength);
183 return jls.get_handle();
187 /* javastring_safe_new_from_utf8 ***********************************************
189 Create a new object of type java/lang/String with the text from
190 the specified UTF-8 string. This function is safe for invalid UTF-8.
191 (Invalid characters will be replaced by U+fffd.)
194 text.........the UTF-8 string, zero-terminated.
197 the java.lang.String object, or
198 NULL if an exception has been thrown
200 *******************************************************************************/
202 java_handle_t *javastring_safe_new_from_utf8(const char *text)
207 /* Get number of bytes. We need this to completely emulate the messy */
208 /* behaviour of the RI. :( */
210 int32_t nbytes = strlen(text);
212 /* calculate number of Java characters */
214 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
216 /* allocate the String object and the char array */
218 java_handle_t* h = builtin_new(class_java_lang_String);
219 java_handle_chararray_t* ca = builtin_newarray_char(len);
221 /* javastring or character-array could not be created? */
223 if ((h == NULL) || (ca == NULL))
226 /* decompress UTF-8 string */
228 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(ca));
230 /* set fields of the String object */
232 java_lang_String jls(h, ca, len);
234 return jls.get_handle();
238 /* javastring_new_from_utf_string **********************************************
240 Create a new object of type java/lang/String with the text from
241 the specified zero-terminated utf8 string.
244 buffer.......points to first char in the buffer
245 blength......number of bytes to read from the buffer
248 the java.lang.String object, or
249 NULL if an exception has been thrown
251 *******************************************************************************/
253 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
257 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
261 /* javastring_new **************************************************************
263 creates a new object of type java/lang/String with the text of
264 the specified utf8-string
266 return: pointer to the string or NULL if memory is exhausted.
268 *******************************************************************************/
270 java_handle_t *javastring_new(utf *u)
273 exceptions_throw_nullpointerexception();
277 char* utf_ptr = u->text;
278 int32_t utflength = utf_get_number_of_u2s(u);
280 java_handle_t* h = builtin_new(class_java_lang_String);
281 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
283 /* javastring or character-array could not be created */
285 if ((h == NULL) || (ca == NULL))
288 /* decompress utf-string */
290 for (int32_t i = 0; i < utflength; i++)
291 LLNI_array_direct(ca, i) = utf_nextu2(&utf_ptr);
293 /* set fields of the javastring-object */
295 java_lang_String jls(h, ca, utflength);
297 return jls.get_handle();
301 /* javastring_new_slash_to_dot *************************************************
303 creates a new object of type java/lang/String with the text of
304 the specified utf8-string with slashes changed to dots
306 return: pointer to the string or NULL if memory is exhausted.
308 *******************************************************************************/
310 java_handle_t *javastring_new_slash_to_dot(utf *u)
313 exceptions_throw_nullpointerexception();
317 char* utf_ptr = u->text;
318 int32_t utflength = utf_get_number_of_u2s(u);
320 java_handle_t* h = builtin_new(class_java_lang_String);
321 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
323 /* javastring or character-array could not be created */
324 if ((h == NULL) || (ca == NULL))
327 /* decompress utf-string */
329 for (int32_t i = 0; i < utflength; i++) {
330 uint16_t ch = utf_nextu2(&utf_ptr);
335 LLNI_array_direct(ca, i) = ch;
338 /* set fields of the javastring-object */
340 java_lang_String jls(h, ca, utflength);
342 return jls.get_handle();
346 /* javastring_new_from_ascii ***************************************************
348 creates a new java/lang/String object which contains the given ASCII
349 C-string converted to UTF-16.
352 text.........string of ASCII characters
355 the java.lang.String object, or
356 NULL if an exception has been thrown.
358 *******************************************************************************/
360 java_handle_t *javastring_new_from_ascii(const char *text)
363 exceptions_throw_nullpointerexception();
367 int32_t len = strlen(text);
369 java_handle_t* h = builtin_new(class_java_lang_String);
370 java_handle_chararray_t* ca = builtin_newarray_char(len);
372 /* javastring or character-array could not be created */
374 if ((h == NULL) || (ca == NULL))
379 for (int32_t i = 0; i < len; i++)
380 LLNI_array_direct(ca, i) = text[i];
382 /* set fields of the javastring-object */
384 java_lang_String jls(h, ca, len);
386 return jls.get_handle();
390 /* javastring_tochar ***********************************************************
392 converts a Java string into a C string.
394 return: pointer to C string
396 Caution: calling method MUST release the allocated memory!
398 *******************************************************************************/
400 char* javastring_tochar(java_handle_t* h)
402 java_lang_String jls(h);
407 java_handle_chararray_t* ca = jls.get_value();
412 int32_t count = jls.get_count();
413 int32_t offset = jls.get_offset();
415 char* buf = MNEW(char, count + 1);
418 for (i = 0; i < count; i++)
419 buf[i] = LLNI_array_direct(ca, offset + i);
427 /* javastring_toutf ************************************************************
429 Make utf symbol from javastring.
431 *******************************************************************************/
433 utf *javastring_toutf(java_handle_t *string, bool isclassname)
435 java_lang_String jls(string);
440 java_handle_chararray_t* value = jls.get_value();
442 if (jls.get_value() == NULL)
445 int32_t count = jls.get_count();
446 int32_t offset = jls.get_offset();
448 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
452 /* literalstring_u2 ************************************************************
454 Searches for the literalstring with the specified u2-array in the
455 string hashtable, if there is no such string a new one is created.
457 If copymode is true a copy of the u2-array is made.
459 *******************************************************************************/
461 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
462 u4 offset, bool copymode)
464 literalstring *s; /* hashtable element */
465 java_chararray_t *ca; /* copy of u2-array */
470 LOCK_MONITOR_ENTER(lock_hashtable_string);
472 /* find location in hashtable */
474 key = unicode_hashkey(a->data + offset, length);
475 slot = key & (hashtable_string.size - 1);
476 s = (literalstring*) hashtable_string.ptr[slot];
480 java_lang_String js(LLNI_WRAP(s->string));
482 if (length == js.get_count()) {
485 for (i = 0; i < length; i++)
486 // FIXME This is not handle capable!
488 if (a->data[offset + i] != ((java_chararray_t*) LLNI_UNWRAP(js.get_value()))->data[i])
491 /* string already in hashtable, free memory */
494 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
496 LOCK_MONITOR_EXIT(lock_hashtable_string);
498 return (java_object_t*) LLNI_UNWRAP(js.get_handle());
502 /* follow link in external hash chain */
507 /* create copy of u2-array for new javastring */
508 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
509 ca = (java_chararray_t*) mem_alloc(arraysize);
510 /* memcpy(ca, a, arraysize); */
511 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
512 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
518 /* location in hashtable found, complete arrayheader */
520 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
521 ca->header.size = length;
523 assert(class_java_lang_String);
524 assert(class_java_lang_String->state & CLASS_LOADED);
526 // Create a new java.lang.String object on the system heap.
527 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
529 java_handle_t* h = LLNI_WRAP(o);
531 #if defined(ENABLE_STATISTICS)
533 size_string += sizeof(class_java_lang_String->instancesize);
536 #if defined(ENABLE_THREADS)
537 lock_init_object_lock(o);
540 o->vftbl = class_java_lang_String->vftbl;
543 java_lang_String jls(h, LLNI_WRAP(ca), length);
545 /* create new literalstring */
547 s = NEW(literalstring);
549 #if defined(ENABLE_STATISTICS)
551 size_string += sizeof(literalstring);
554 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
555 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
556 hashtable_string.ptr[slot] = s;
558 /* update number of hashtable entries */
560 hashtable_string.entries++;
562 /* reorganization of hashtable */
564 if (hashtable_string.entries > (hashtable_string.size * 2)) {
565 /* reorganization of hashtable, average length of the external
566 chains is approx. 2 */
570 literalstring *nexts;
571 hashtable newhash; /* the new hashtable */
573 /* create new hashtable, double the size */
575 hashtable_create(&newhash, hashtable_string.size * 2);
576 newhash.entries = hashtable_string.entries;
578 /* transfer elements to new hashtable */
580 for (i = 0; i < hashtable_string.size; i++) {
581 s = (literalstring*) hashtable_string.ptr[i];
585 java_lang_String tmpjls(LLNI_WRAP(s->string));
586 // FIXME This is not handle capable!
587 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
589 s->hashlink = (literalstring*) newhash.ptr[slot];
590 newhash.ptr[slot] = s;
592 /* follow link in external hash chain */
597 /* dispose old table */
599 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
600 hashtable_string = newhash;
603 LOCK_MONITOR_EXIT(lock_hashtable_string);
605 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
609 /* literalstring_new ***********************************************************
611 Creates a new literalstring with the text of the utf-symbol and inserts
612 it into the string hashtable.
614 *******************************************************************************/
616 java_object_t *literalstring_new(utf *u)
618 char *utf_ptr; /* pointer to current unicode character */
620 u4 utflength; /* length of utf-string if uncompressed */
621 java_chararray_t *a; /* u2-array constructed from utf string */
625 utflength = utf_get_number_of_u2s(u);
627 /* allocate memory */
628 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
630 /* convert utf-string to u2-array */
631 for (i = 0; i < utflength; i++)
632 a->data[i] = utf_nextu2(&utf_ptr);
634 return literalstring_u2(a, utflength, 0, false);
638 /* literalstring_free **********************************************************
640 Removes a literalstring from memory.
642 *******************************************************************************/
645 /* TWISTI This one is currently not used. */
647 static void literalstring_free(java_object_t* string)
652 s = (heapstring_t *) string;
655 /* dispose memory of java.lang.String object */
656 FREE(s, heapstring_t);
658 /* dispose memory of java-characterarray */
659 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
664 /* javastring_intern ***********************************************************
666 Intern the given Java string.
668 XXX NOTE: Literal Strings are direct references since they are not placed
669 onto the GC-Heap. That's why this function looks so "different".
671 *******************************************************************************/
673 java_handle_t *javastring_intern(java_handle_t *string)
675 java_lang_String jls(string);
677 java_handle_chararray_t* value = jls.get_value();
679 java_chararray_t* ca = LLNI_UNWRAP(value); /* XXX see note above */
681 int32_t count = jls.get_count();
682 int32_t offset = jls.get_offset();
684 java_object_t* o = literalstring_u2(ca, count, offset, true); /* XXX see note above */
686 return LLNI_WRAP(o); /* XXX see note above */
690 /* javastring_fprint ***********************************************************
692 Print the given Java string to the given stream.
694 *******************************************************************************/
696 void javastring_fprint(java_handle_t *s, FILE *stream)
698 java_lang_String jls(s);
700 java_handle_chararray_t* value = jls.get_value();
702 int32_t count = jls.get_count();
703 int32_t offset = jls.get_offset();
705 for (int32_t i = offset; i < offset + count; i++) {
706 uint16_t c = LLNI_array_direct(value, i);
713 * These are local overrides for various environment variables in Emacs.
714 * Please do not remove this and leave it at the end of the file, where
715 * Emacs will automagically detect them.
716 * ---------------------------------------------------------------------
719 * indent-tabs-mode: t
723 * vim:noexpandtab:sw=4:ts=4: