1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "vmcore/os.hpp"
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "threads/lock-common.h"
44 #include "vm/builtin.h"
45 #include "vm/exceptions.hpp"
46 #include "vm/primitive.hpp"
47 #include "vm/string.hpp"
50 #include "vmcore/globals.hpp"
51 #include "vmcore/javaobjects.hpp"
52 #include "vmcore/options.h"
53 #include "vmcore/statistics.h"
54 #include "vmcore/utf8.h"
57 /* global variables ***********************************************************/
59 /* hashsize must be power of 2 */
61 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
63 hashtable hashtable_string; /* hashtable for javastrings */
65 #if defined(ENABLE_THREADS)
66 static java_object_t *lock_hashtable_string;
70 /* string_init *****************************************************************
72 Initialize the string hashtable lock.
74 *******************************************************************************/
76 bool string_init(void)
78 TRACESUBSYSTEMINITIALIZATION("string_init");
80 /* create string (javastring) hashtable */
82 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
84 #if defined(ENABLE_THREADS)
85 /* create string hashtable lock object */
87 lock_hashtable_string = NEW(java_object_t);
89 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
98 /* stringtable_update **********************************************************
100 Traverses the javastring hashtable and sets the vftbl-entries of
101 javastrings which were temporarily set to NULL, because
102 java.lang.Object was not yet loaded.
104 *******************************************************************************/
106 void stringtable_update(void)
109 literalstring *s; /* hashtable entry */
111 for (unsigned int i = 0; i < hashtable_string.size; i++) {
112 s = (literalstring*) hashtable_string.ptr[i];
117 java_lang_String js(LLNI_WRAP(s->string));
119 if (js.is_null() || (js.get_value() == NULL)) {
120 /* error in hashtable found */
122 vm_abort("stringtable_update: invalid literalstring in hashtable");
125 a = LLNI_UNWRAP(js.get_value());
127 if (js.get_vftbl() == NULL)
129 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
131 if (a->header.objheader.vftbl == NULL)
132 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
134 /* follow link in external hash chain */
142 /* javastring_new_from_utf_buffer **********************************************
144 Create a new object of type java/lang/String with the text from
145 the specified utf8 buffer.
148 buffer.......points to first char in the buffer
149 blength......number of bytes to read from the buffer
152 the java.lang.String object, or
153 NULL if an exception has been thrown
155 *******************************************************************************/
157 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
159 const char *utf_ptr; /* current utf character in utf string */
163 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
165 java_handle_t* h = builtin_new(class_java_lang_String);
166 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
168 /* javastring or character-array could not be created */
170 if ((h == NULL) || (ca == NULL))
173 /* decompress utf-string */
177 for (int32_t i = 0; i < utflength; i++)
178 LLNI_array_direct(ca, i) = utf_nextu2((char **) &utf_ptr);
180 /* set fields of the javastring-object */
182 java_lang_String jls(h, ca, utflength);
184 return jls.get_handle();
188 /* javastring_safe_new_from_utf8 ***********************************************
190 Create a new object of type java/lang/String with the text from
191 the specified UTF-8 string. This function is safe for invalid UTF-8.
192 (Invalid characters will be replaced by U+fffd.)
195 text.........the UTF-8 string, zero-terminated.
198 the java.lang.String object, or
199 NULL if an exception has been thrown
201 *******************************************************************************/
203 java_handle_t *javastring_safe_new_from_utf8(const char *text)
208 /* Get number of bytes. We need this to completely emulate the messy */
209 /* behaviour of the RI. :( */
211 int32_t nbytes = strlen(text);
213 /* calculate number of Java characters */
215 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
217 /* allocate the String object and the char array */
219 java_handle_t* h = builtin_new(class_java_lang_String);
220 java_handle_chararray_t* ca = builtin_newarray_char(len);
222 /* javastring or character-array could not be created? */
224 if ((h == NULL) || (ca == NULL))
227 /* decompress UTF-8 string */
229 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(ca));
231 /* set fields of the String object */
233 java_lang_String jls(h, ca, len);
235 return jls.get_handle();
239 /* javastring_new_from_utf_string **********************************************
241 Create a new object of type java/lang/String with the text from
242 the specified zero-terminated utf8 string.
245 buffer.......points to first char in the buffer
246 blength......number of bytes to read from the buffer
249 the java.lang.String object, or
250 NULL if an exception has been thrown
252 *******************************************************************************/
254 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
258 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
262 /* javastring_new **************************************************************
264 creates a new object of type java/lang/String with the text of
265 the specified utf8-string
267 return: pointer to the string or NULL if memory is exhausted.
269 *******************************************************************************/
271 java_handle_t *javastring_new(utf *u)
274 exceptions_throw_nullpointerexception();
278 char* utf_ptr = u->text;
279 int32_t utflength = utf_get_number_of_u2s(u);
281 java_handle_t* h = builtin_new(class_java_lang_String);
282 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
284 /* javastring or character-array could not be created */
286 if ((h == NULL) || (ca == NULL))
289 /* decompress utf-string */
291 for (int32_t i = 0; i < utflength; i++)
292 LLNI_array_direct(ca, i) = utf_nextu2(&utf_ptr);
294 /* set fields of the javastring-object */
296 java_lang_String jls(h, ca, utflength);
298 return jls.get_handle();
302 /* javastring_new_slash_to_dot *************************************************
304 creates a new object of type java/lang/String with the text of
305 the specified utf8-string with slashes changed to dots
307 return: pointer to the string or NULL if memory is exhausted.
309 *******************************************************************************/
311 java_handle_t *javastring_new_slash_to_dot(utf *u)
314 exceptions_throw_nullpointerexception();
318 char* utf_ptr = u->text;
319 int32_t utflength = utf_get_number_of_u2s(u);
321 java_handle_t* h = builtin_new(class_java_lang_String);
322 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
324 /* javastring or character-array could not be created */
325 if ((h == NULL) || (ca == NULL))
328 /* decompress utf-string */
330 for (int32_t i = 0; i < utflength; i++) {
331 uint16_t ch = utf_nextu2(&utf_ptr);
336 LLNI_array_direct(ca, i) = ch;
339 /* set fields of the javastring-object */
341 java_lang_String jls(h, ca, utflength);
343 return jls.get_handle();
347 /* javastring_new_from_ascii ***************************************************
349 creates a new java/lang/String object which contains the given ASCII
350 C-string converted to UTF-16.
353 text.........string of ASCII characters
356 the java.lang.String object, or
357 NULL if an exception has been thrown.
359 *******************************************************************************/
361 java_handle_t *javastring_new_from_ascii(const char *text)
364 exceptions_throw_nullpointerexception();
368 int32_t len = strlen(text);
370 java_handle_t* h = builtin_new(class_java_lang_String);
371 java_handle_chararray_t* ca = builtin_newarray_char(len);
373 /* javastring or character-array could not be created */
375 if ((h == NULL) || (ca == NULL))
380 for (int32_t i = 0; i < len; i++)
381 LLNI_array_direct(ca, i) = text[i];
383 /* set fields of the javastring-object */
385 java_lang_String jls(h, ca, len);
387 return jls.get_handle();
391 /* javastring_tochar ***********************************************************
393 converts a Java string into a C string.
395 return: pointer to C string
397 Caution: calling method MUST release the allocated memory!
399 *******************************************************************************/
401 char* javastring_tochar(java_handle_t* h)
403 java_lang_String jls(h);
408 java_handle_chararray_t* ca = jls.get_value();
413 int32_t count = jls.get_count();
414 int32_t offset = jls.get_offset();
416 char* buf = MNEW(char, count + 1);
419 for (i = 0; i < count; i++)
420 buf[i] = LLNI_array_direct(ca, offset + i);
428 /* javastring_toutf ************************************************************
430 Make utf symbol from javastring.
432 *******************************************************************************/
434 utf *javastring_toutf(java_handle_t *string, bool isclassname)
436 java_lang_String jls(string);
441 java_handle_chararray_t* value = jls.get_value();
443 if (jls.get_value() == NULL)
446 int32_t count = jls.get_count();
447 int32_t offset = jls.get_offset();
449 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
453 /* literalstring_u2 ************************************************************
455 Searches for the literalstring with the specified u2-array in the
456 string hashtable, if there is no such string a new one is created.
458 If copymode is true a copy of the u2-array is made.
460 *******************************************************************************/
462 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
463 u4 offset, bool copymode)
465 literalstring *s; /* hashtable element */
466 java_chararray_t *ca; /* copy of u2-array */
471 LOCK_MONITOR_ENTER(lock_hashtable_string);
473 /* find location in hashtable */
475 key = unicode_hashkey(a->data + offset, length);
476 slot = key & (hashtable_string.size - 1);
477 s = (literalstring*) hashtable_string.ptr[slot];
481 java_lang_String js(LLNI_WRAP(s->string));
483 if (length == js.get_count()) {
486 for (i = 0; i < length; i++)
487 // FIXME This is not handle capable!
489 if (a->data[offset + i] != ((java_chararray_t*) LLNI_UNWRAP(js.get_value()))->data[i])
492 /* string already in hashtable, free memory */
495 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
497 LOCK_MONITOR_EXIT(lock_hashtable_string);
499 return (java_object_t*) LLNI_UNWRAP(js.get_handle());
503 /* follow link in external hash chain */
508 /* create copy of u2-array for new javastring */
509 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
510 ca = (java_chararray_t*) mem_alloc(arraysize);
511 /* memcpy(ca, a, arraysize); */
512 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
513 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
519 /* location in hashtable found, complete arrayheader */
521 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
522 ca->header.size = length;
524 assert(class_java_lang_String);
525 assert(class_java_lang_String->state & CLASS_LOADED);
527 // Create a new java.lang.String object on the system heap.
528 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
530 java_handle_t* h = LLNI_WRAP(o);
532 #if defined(ENABLE_STATISTICS)
534 size_string += sizeof(class_java_lang_String->instancesize);
537 #if defined(ENABLE_THREADS)
538 lock_init_object_lock(o);
541 o->vftbl = class_java_lang_String->vftbl;
544 java_lang_String jls(h, LLNI_WRAP(ca), length);
546 /* create new literalstring */
548 s = NEW(literalstring);
550 #if defined(ENABLE_STATISTICS)
552 size_string += sizeof(literalstring);
555 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
556 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
557 hashtable_string.ptr[slot] = s;
559 /* update number of hashtable entries */
561 hashtable_string.entries++;
563 /* reorganization of hashtable */
565 if (hashtable_string.entries > (hashtable_string.size * 2)) {
566 /* reorganization of hashtable, average length of the external
567 chains is approx. 2 */
571 literalstring *nexts;
572 hashtable newhash; /* the new hashtable */
574 /* create new hashtable, double the size */
576 hashtable_create(&newhash, hashtable_string.size * 2);
577 newhash.entries = hashtable_string.entries;
579 /* transfer elements to new hashtable */
581 for (i = 0; i < hashtable_string.size; i++) {
582 s = (literalstring*) hashtable_string.ptr[i];
586 java_lang_String tmpjls(LLNI_WRAP(s->string));
587 // FIXME This is not handle capable!
588 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
590 s->hashlink = (literalstring*) newhash.ptr[slot];
591 newhash.ptr[slot] = s;
593 /* follow link in external hash chain */
598 /* dispose old table */
600 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
601 hashtable_string = newhash;
604 LOCK_MONITOR_EXIT(lock_hashtable_string);
606 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
610 /* literalstring_new ***********************************************************
612 Creates a new literalstring with the text of the utf-symbol and inserts
613 it into the string hashtable.
615 *******************************************************************************/
617 java_object_t *literalstring_new(utf *u)
619 char *utf_ptr; /* pointer to current unicode character */
621 u4 utflength; /* length of utf-string if uncompressed */
622 java_chararray_t *a; /* u2-array constructed from utf string */
626 utflength = utf_get_number_of_u2s(u);
628 /* allocate memory */
629 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
631 /* convert utf-string to u2-array */
632 for (i = 0; i < utflength; i++)
633 a->data[i] = utf_nextu2(&utf_ptr);
635 return literalstring_u2(a, utflength, 0, false);
639 /* literalstring_free **********************************************************
641 Removes a literalstring from memory.
643 *******************************************************************************/
646 /* TWISTI This one is currently not used. */
648 static void literalstring_free(java_object_t* string)
653 s = (heapstring_t *) string;
656 /* dispose memory of java.lang.String object */
657 FREE(s, heapstring_t);
659 /* dispose memory of java-characterarray */
660 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
665 /* javastring_intern ***********************************************************
667 Intern the given Java string.
669 XXX NOTE: Literal Strings are direct references since they are not placed
670 onto the GC-Heap. That's why this function looks so "different".
672 *******************************************************************************/
674 java_handle_t *javastring_intern(java_handle_t *string)
676 java_lang_String jls(string);
678 java_handle_chararray_t* value = jls.get_value();
680 java_chararray_t* ca = LLNI_UNWRAP(value); /* XXX see note above */
682 int32_t count = jls.get_count();
683 int32_t offset = jls.get_offset();
685 java_object_t* o = literalstring_u2(ca, count, offset, true); /* XXX see note above */
687 return LLNI_WRAP(o); /* XXX see note above */
691 /* javastring_fprint ***********************************************************
693 Print the given Java string to the given stream.
695 *******************************************************************************/
697 void javastring_fprint(java_handle_t *s, FILE *stream)
699 java_lang_String jls(s);
701 java_handle_chararray_t* value = jls.get_value();
703 int32_t count = jls.get_count();
704 int32_t offset = jls.get_offset();
706 for (int32_t i = offset; i < offset + count; i++) {
707 uint16_t c = LLNI_array_direct(value, i);
714 * These are local overrides for various environment variables in Emacs.
715 * Please do not remove this and leave it at the end of the file, where
716 * Emacs will automagically detect them.
717 * ---------------------------------------------------------------------
720 * indent-tabs-mode: t
724 * vim:noexpandtab:sw=4:ts=4: