1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/llni.h"
40 #include "threads/lock-common.h"
43 #include "vm/builtin.h"
44 #include "vm/exceptions.hpp"
45 #include "vm/globals.hpp"
46 #include "vm/javaobjects.hpp"
47 #include "vm/options.h"
48 #include "vm/primitive.hpp"
49 #include "vm/statistics.h"
50 #include "vm/string.hpp"
55 /* global variables ***********************************************************/
57 /* hashsize must be power of 2 */
59 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
61 static hashtable hashtable_string; /* hashtable for javastrings */
65 /* string_init *****************************************************************
67 Initialize the string hashtable lock.
69 *******************************************************************************/
71 bool string_init(void)
73 TRACESUBSYSTEMINITIALIZATION("string_init");
75 /* create string (javastring) hashtable */
77 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
87 /* stringtable_update **********************************************************
89 Traverses the javastring hashtable and sets the vftbl-entries of
90 javastrings which were temporarily set to NULL, because
91 java.lang.Object was not yet loaded.
93 *******************************************************************************/
95 void stringtable_update(void)
98 literalstring *s; /* hashtable entry */
100 for (unsigned int i = 0; i < hashtable_string.size; i++) {
101 s = (literalstring*) hashtable_string.ptr[i];
106 java_lang_String js(LLNI_WRAP(s->string));
108 if (js.is_null() || (js.get_value() == NULL)) {
109 /* error in hashtable found */
111 vm_abort("stringtable_update: invalid literalstring in hashtable");
114 a = LLNI_UNWRAP(js.get_value());
116 if (js.get_vftbl() == NULL)
118 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
120 if (a->header.objheader.vftbl == NULL)
121 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
123 /* follow link in external hash chain */
131 /* javastring_new_from_utf_buffer **********************************************
133 Create a new object of type java/lang/String with the text from
134 the specified utf8 buffer.
137 buffer.......points to first char in the buffer
138 blength......number of bytes to read from the buffer
141 the java.lang.String object, or
142 NULL if an exception has been thrown
144 *******************************************************************************/
146 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
148 const char *utf_ptr; /* current utf character in utf string */
152 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
154 java_handle_t* h = builtin_new(class_java_lang_String);
155 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
157 /* javastring or character-array could not be created */
159 if ((h == NULL) || (ca == NULL))
162 /* decompress utf-string */
166 for (int32_t i = 0; i < utflength; i++)
167 LLNI_array_direct(ca, i) = utf_nextu2((char **) &utf_ptr);
169 /* set fields of the javastring-object */
171 java_lang_String jls(h, ca, utflength);
173 return jls.get_handle();
177 /* javastring_safe_new_from_utf8 ***********************************************
179 Create a new object of type java/lang/String with the text from
180 the specified UTF-8 string. This function is safe for invalid UTF-8.
181 (Invalid characters will be replaced by U+fffd.)
184 text.........the UTF-8 string, zero-terminated.
187 the java.lang.String object, or
188 NULL if an exception has been thrown
190 *******************************************************************************/
192 java_handle_t *javastring_safe_new_from_utf8(const char *text)
197 /* Get number of bytes. We need this to completely emulate the messy */
198 /* behaviour of the RI. :( */
200 int32_t nbytes = strlen(text);
202 /* calculate number of Java characters */
204 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
206 /* allocate the String object and the char array */
208 java_handle_t* h = builtin_new(class_java_lang_String);
209 java_handle_chararray_t* ca = builtin_newarray_char(len);
211 /* javastring or character-array could not be created? */
213 if ((h == NULL) || (ca == NULL))
216 /* decompress UTF-8 string */
218 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(ca));
220 /* set fields of the String object */
222 java_lang_String jls(h, ca, len);
224 return jls.get_handle();
228 /* javastring_new_from_utf_string **********************************************
230 Create a new object of type java/lang/String with the text from
231 the specified zero-terminated utf8 string.
234 buffer.......points to first char in the buffer
235 blength......number of bytes to read from the buffer
238 the java.lang.String object, or
239 NULL if an exception has been thrown
241 *******************************************************************************/
243 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
247 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
251 /* javastring_new **************************************************************
253 creates a new object of type java/lang/String with the text of
254 the specified utf8-string
256 return: pointer to the string or NULL if memory is exhausted.
258 *******************************************************************************/
260 java_handle_t *javastring_new(utf *u)
263 exceptions_throw_nullpointerexception();
267 char* utf_ptr = u->text;
268 int32_t utflength = utf_get_number_of_u2s(u);
270 java_handle_t* h = builtin_new(class_java_lang_String);
271 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
273 /* javastring or character-array could not be created */
275 if ((h == NULL) || (ca == NULL))
278 /* decompress utf-string */
280 for (int32_t i = 0; i < utflength; i++)
281 LLNI_array_direct(ca, i) = utf_nextu2(&utf_ptr);
283 /* set fields of the javastring-object */
285 java_lang_String jls(h, ca, utflength);
287 return jls.get_handle();
291 /* javastring_new_slash_to_dot *************************************************
293 creates a new object of type java/lang/String with the text of
294 the specified utf8-string with slashes changed to dots
296 return: pointer to the string or NULL if memory is exhausted.
298 *******************************************************************************/
300 java_handle_t *javastring_new_slash_to_dot(utf *u)
303 exceptions_throw_nullpointerexception();
307 char* utf_ptr = u->text;
308 int32_t utflength = utf_get_number_of_u2s(u);
310 java_handle_t* h = builtin_new(class_java_lang_String);
311 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
313 /* javastring or character-array could not be created */
314 if ((h == NULL) || (ca == NULL))
317 /* decompress utf-string */
319 for (int32_t i = 0; i < utflength; i++) {
320 uint16_t ch = utf_nextu2(&utf_ptr);
325 LLNI_array_direct(ca, i) = ch;
328 /* set fields of the javastring-object */
330 java_lang_String jls(h, ca, utflength);
332 return jls.get_handle();
336 /* javastring_new_from_ascii ***************************************************
338 creates a new java/lang/String object which contains the given ASCII
339 C-string converted to UTF-16.
342 text.........string of ASCII characters
345 the java.lang.String object, or
346 NULL if an exception has been thrown.
348 *******************************************************************************/
350 java_handle_t *javastring_new_from_ascii(const char *text)
353 exceptions_throw_nullpointerexception();
357 int32_t len = strlen(text);
359 java_handle_t* h = builtin_new(class_java_lang_String);
360 java_handle_chararray_t* ca = builtin_newarray_char(len);
362 /* javastring or character-array could not be created */
364 if ((h == NULL) || (ca == NULL))
369 for (int32_t i = 0; i < len; i++)
370 LLNI_array_direct(ca, i) = text[i];
372 /* set fields of the javastring-object */
374 java_lang_String jls(h, ca, len);
376 return jls.get_handle();
380 /* javastring_tochar ***********************************************************
382 converts a Java string into a C string.
384 return: pointer to C string
386 Caution: calling method MUST release the allocated memory!
388 *******************************************************************************/
390 char* javastring_tochar(java_handle_t* h)
392 java_lang_String jls(h);
397 java_handle_chararray_t* ca = jls.get_value();
402 int32_t count = jls.get_count();
403 int32_t offset = jls.get_offset();
405 char* buf = MNEW(char, count + 1);
408 for (i = 0; i < count; i++)
409 buf[i] = LLNI_array_direct(ca, offset + i);
417 /* javastring_toutf ************************************************************
419 Make utf symbol from javastring.
421 *******************************************************************************/
423 utf *javastring_toutf(java_handle_t *string, bool isclassname)
425 java_lang_String jls(string);
430 java_handle_chararray_t* value = jls.get_value();
432 if (jls.get_value() == NULL)
435 int32_t count = jls.get_count();
436 int32_t offset = jls.get_offset();
438 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
442 /* literalstring_u2 ************************************************************
444 Searches for the literalstring with the specified u2-array in the
445 string hashtable, if there is no such string a new one is created.
447 If copymode is true a copy of the u2-array is made.
449 *******************************************************************************/
451 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
452 u4 offset, bool copymode)
454 literalstring *s; /* hashtable element */
455 java_chararray_t *ca; /* copy of u2-array */
462 /* find location in hashtable */
464 key = unicode_hashkey(a->data + offset, length);
465 slot = key & (hashtable_string.size - 1);
466 s = (literalstring*) hashtable_string.ptr[slot];
470 java_lang_String js(LLNI_WRAP(s->string));
472 if (length == js.get_count()) {
475 for (i = 0; i < length; i++)
476 // FIXME This is not handle capable!
478 if (a->data[offset + i] != ((java_chararray_t*) LLNI_UNWRAP(js.get_value()))->data[i])
481 /* string already in hashtable, free memory */
484 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
488 return (java_object_t*) LLNI_UNWRAP(js.get_handle());
492 /* follow link in external hash chain */
497 /* create copy of u2-array for new javastring */
498 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
499 ca = (java_chararray_t*) mem_alloc(arraysize);
500 /* memcpy(ca, a, arraysize); */
501 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
502 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
508 /* location in hashtable found, complete arrayheader */
510 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
511 ca->header.size = length;
513 assert(class_java_lang_String);
514 assert(class_java_lang_String->state & CLASS_LOADED);
516 // Create a new java.lang.String object on the system heap.
517 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
519 java_handle_t* h = LLNI_WRAP(o);
521 #if defined(ENABLE_STATISTICS)
523 size_string += sizeof(class_java_lang_String->instancesize);
526 #if defined(ENABLE_THREADS)
527 lock_init_object_lock(o);
530 o->vftbl = class_java_lang_String->vftbl;
533 java_lang_String jls(h, LLNI_WRAP(ca), length);
535 /* create new literalstring */
537 s = NEW(literalstring);
539 #if defined(ENABLE_STATISTICS)
541 size_string += sizeof(literalstring);
544 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
545 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
546 hashtable_string.ptr[slot] = s;
548 /* update number of hashtable entries */
550 hashtable_string.entries++;
552 /* reorganization of hashtable */
554 if (hashtable_string.entries > (hashtable_string.size * 2)) {
555 /* reorganization of hashtable, average length of the external
556 chains is approx. 2 */
560 literalstring *nexts;
561 hashtable newhash; /* the new hashtable */
563 /* create new hashtable, double the size */
565 hashtable_create(&newhash, hashtable_string.size * 2);
566 newhash.entries = hashtable_string.entries;
568 /* transfer elements to new hashtable */
570 for (i = 0; i < hashtable_string.size; i++) {
571 s = (literalstring*) hashtable_string.ptr[i];
575 java_lang_String tmpjls(LLNI_WRAP(s->string));
576 // FIXME This is not handle capable!
577 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
579 s->hashlink = (literalstring*) newhash.ptr[slot];
580 newhash.ptr[slot] = s;
582 /* follow link in external hash chain */
587 /* dispose old table */
589 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
590 hashtable_string = newhash;
595 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
599 /* literalstring_new ***********************************************************
601 Creates a new literalstring with the text of the utf-symbol and inserts
602 it into the string hashtable.
604 *******************************************************************************/
606 java_object_t *literalstring_new(utf *u)
608 char *utf_ptr; /* pointer to current unicode character */
610 u4 utflength; /* length of utf-string if uncompressed */
611 java_chararray_t *a; /* u2-array constructed from utf string */
615 utflength = utf_get_number_of_u2s(u);
617 /* allocate memory */
618 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
620 /* convert utf-string to u2-array */
621 for (i = 0; i < utflength; i++)
622 a->data[i] = utf_nextu2(&utf_ptr);
624 return literalstring_u2(a, utflength, 0, false);
628 /* literalstring_free **********************************************************
630 Removes a literalstring from memory.
632 *******************************************************************************/
635 /* TWISTI This one is currently not used. */
637 static void literalstring_free(java_object_t* string)
642 s = (heapstring_t *) string;
645 /* dispose memory of java.lang.String object */
646 FREE(s, heapstring_t);
648 /* dispose memory of java-characterarray */
649 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
654 /* javastring_intern ***********************************************************
656 Intern the given Java string.
658 XXX NOTE: Literal Strings are direct references since they are not placed
659 onto the GC-Heap. That's why this function looks so "different".
661 *******************************************************************************/
663 java_handle_t *javastring_intern(java_handle_t *string)
665 java_lang_String jls(string);
667 java_handle_chararray_t* value = jls.get_value();
669 java_chararray_t* ca = LLNI_UNWRAP(value); /* XXX see note above */
671 int32_t count = jls.get_count();
672 int32_t offset = jls.get_offset();
674 java_object_t* o = literalstring_u2(ca, count, offset, true); /* XXX see note above */
676 return LLNI_WRAP(o); /* XXX see note above */
680 /* javastring_fprint ***********************************************************
682 Print the given Java string to the given stream.
684 *******************************************************************************/
686 void javastring_fprint(java_handle_t *s, FILE *stream)
688 java_lang_String jls(s);
690 java_handle_chararray_t* value = jls.get_value();
692 int32_t count = jls.get_count();
693 int32_t offset = jls.get_offset();
695 for (int32_t i = offset; i < offset + count; i++) {
696 uint16_t c = LLNI_array_direct(value, i);
703 * These are local overrides for various environment variables in Emacs.
704 * Please do not remove this and leave it at the end of the file, where
705 * Emacs will automagically detect them.
706 * ---------------------------------------------------------------------
709 * indent-tabs-mode: t
713 * vim:noexpandtab:sw=4:ts=4: