1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "threads/lock-common.h"
44 #include "vm/builtin.h"
45 #include "vm/exceptions.hpp"
46 #include "vm/globals.hpp"
47 #include "vm/javaobjects.hpp"
48 #include "vm/options.h"
49 #include "vm/primitive.hpp"
50 #include "vm/statistics.h"
51 #include "vm/string.hpp"
56 /* global variables ***********************************************************/
58 /* hashsize must be power of 2 */
60 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
62 static hashtable hashtable_string; /* hashtable for javastrings */
66 /* string_init *****************************************************************
68 Initialize the string hashtable lock.
70 *******************************************************************************/
72 bool string_init(void)
74 TRACESUBSYSTEMINITIALIZATION("string_init");
76 /* create string (javastring) hashtable */
78 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
88 /* stringtable_update **********************************************************
90 Traverses the javastring hashtable and sets the vftbl-entries of
91 javastrings which were temporarily set to NULL, because
92 java.lang.Object was not yet loaded.
94 *******************************************************************************/
96 void stringtable_update(void)
99 literalstring *s; /* hashtable entry */
101 for (unsigned int i = 0; i < hashtable_string.size; i++) {
102 s = (literalstring*) hashtable_string.ptr[i];
107 java_lang_String js(LLNI_WRAP(s->string));
109 if (js.is_null() || (js.get_value() == NULL)) {
110 /* error in hashtable found */
112 vm_abort("stringtable_update: invalid literalstring in hashtable");
115 a = LLNI_UNWRAP(js.get_value());
117 if (js.get_vftbl() == NULL)
119 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
121 if (a->header.objheader.vftbl == NULL)
122 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
124 /* follow link in external hash chain */
132 /* javastring_new_from_utf_buffer **********************************************
134 Create a new object of type java/lang/String with the text from
135 the specified utf8 buffer.
138 buffer.......points to first char in the buffer
139 blength......number of bytes to read from the buffer
142 the java.lang.String object, or
143 NULL if an exception has been thrown
145 *******************************************************************************/
147 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
149 const char *utf_ptr; /* current utf character in utf string */
153 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
155 java_handle_t* h = builtin_new(class_java_lang_String);
156 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
158 /* javastring or character-array could not be created */
160 if ((h == NULL) || (ca == NULL))
163 /* decompress utf-string */
167 for (int32_t i = 0; i < utflength; i++)
168 LLNI_array_direct(ca, i) = utf_nextu2((char **) &utf_ptr);
170 /* set fields of the javastring-object */
172 java_lang_String jls(h, ca, utflength);
174 return jls.get_handle();
178 /* javastring_safe_new_from_utf8 ***********************************************
180 Create a new object of type java/lang/String with the text from
181 the specified UTF-8 string. This function is safe for invalid UTF-8.
182 (Invalid characters will be replaced by U+fffd.)
185 text.........the UTF-8 string, zero-terminated.
188 the java.lang.String object, or
189 NULL if an exception has been thrown
191 *******************************************************************************/
193 java_handle_t *javastring_safe_new_from_utf8(const char *text)
198 /* Get number of bytes. We need this to completely emulate the messy */
199 /* behaviour of the RI. :( */
201 int32_t nbytes = strlen(text);
203 /* calculate number of Java characters */
205 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
207 /* allocate the String object and the char array */
209 java_handle_t* h = builtin_new(class_java_lang_String);
210 java_handle_chararray_t* ca = builtin_newarray_char(len);
212 /* javastring or character-array could not be created? */
214 if ((h == NULL) || (ca == NULL))
217 /* decompress UTF-8 string */
219 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(ca));
221 /* set fields of the String object */
223 java_lang_String jls(h, ca, len);
225 return jls.get_handle();
229 /* javastring_new_from_utf_string **********************************************
231 Create a new object of type java/lang/String with the text from
232 the specified zero-terminated utf8 string.
235 buffer.......points to first char in the buffer
236 blength......number of bytes to read from the buffer
239 the java.lang.String object, or
240 NULL if an exception has been thrown
242 *******************************************************************************/
244 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
248 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
252 /* javastring_new **************************************************************
254 creates a new object of type java/lang/String with the text of
255 the specified utf8-string
257 return: pointer to the string or NULL if memory is exhausted.
259 *******************************************************************************/
261 java_handle_t *javastring_new(utf *u)
264 exceptions_throw_nullpointerexception();
268 char* utf_ptr = u->text;
269 int32_t utflength = utf_get_number_of_u2s(u);
271 java_handle_t* h = builtin_new(class_java_lang_String);
272 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
274 /* javastring or character-array could not be created */
276 if ((h == NULL) || (ca == NULL))
279 /* decompress utf-string */
281 for (int32_t i = 0; i < utflength; i++)
282 LLNI_array_direct(ca, i) = utf_nextu2(&utf_ptr);
284 /* set fields of the javastring-object */
286 java_lang_String jls(h, ca, utflength);
288 return jls.get_handle();
292 /* javastring_new_slash_to_dot *************************************************
294 creates a new object of type java/lang/String with the text of
295 the specified utf8-string with slashes changed to dots
297 return: pointer to the string or NULL if memory is exhausted.
299 *******************************************************************************/
301 java_handle_t *javastring_new_slash_to_dot(utf *u)
304 exceptions_throw_nullpointerexception();
308 char* utf_ptr = u->text;
309 int32_t utflength = utf_get_number_of_u2s(u);
311 java_handle_t* h = builtin_new(class_java_lang_String);
312 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
314 /* javastring or character-array could not be created */
315 if ((h == NULL) || (ca == NULL))
318 /* decompress utf-string */
320 for (int32_t i = 0; i < utflength; i++) {
321 uint16_t ch = utf_nextu2(&utf_ptr);
326 LLNI_array_direct(ca, i) = ch;
329 /* set fields of the javastring-object */
331 java_lang_String jls(h, ca, utflength);
333 return jls.get_handle();
337 /* javastring_new_from_ascii ***************************************************
339 creates a new java/lang/String object which contains the given ASCII
340 C-string converted to UTF-16.
343 text.........string of ASCII characters
346 the java.lang.String object, or
347 NULL if an exception has been thrown.
349 *******************************************************************************/
351 java_handle_t *javastring_new_from_ascii(const char *text)
354 exceptions_throw_nullpointerexception();
358 int32_t len = strlen(text);
360 java_handle_t* h = builtin_new(class_java_lang_String);
361 java_handle_chararray_t* ca = builtin_newarray_char(len);
363 /* javastring or character-array could not be created */
365 if ((h == NULL) || (ca == NULL))
370 for (int32_t i = 0; i < len; i++)
371 LLNI_array_direct(ca, i) = text[i];
373 /* set fields of the javastring-object */
375 java_lang_String jls(h, ca, len);
377 return jls.get_handle();
381 /* javastring_tochar ***********************************************************
383 converts a Java string into a C string.
385 return: pointer to C string
387 Caution: calling method MUST release the allocated memory!
389 *******************************************************************************/
391 char* javastring_tochar(java_handle_t* h)
393 java_lang_String jls(h);
398 java_handle_chararray_t* ca = jls.get_value();
403 int32_t count = jls.get_count();
404 int32_t offset = jls.get_offset();
406 char* buf = MNEW(char, count + 1);
409 for (i = 0; i < count; i++)
410 buf[i] = LLNI_array_direct(ca, offset + i);
418 /* javastring_toutf ************************************************************
420 Make utf symbol from javastring.
422 *******************************************************************************/
424 utf *javastring_toutf(java_handle_t *string, bool isclassname)
426 java_lang_String jls(string);
431 java_handle_chararray_t* value = jls.get_value();
433 if (jls.get_value() == NULL)
436 int32_t count = jls.get_count();
437 int32_t offset = jls.get_offset();
439 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
443 /* literalstring_u2 ************************************************************
445 Searches for the literalstring with the specified u2-array in the
446 string hashtable, if there is no such string a new one is created.
448 If copymode is true a copy of the u2-array is made.
450 *******************************************************************************/
452 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
453 u4 offset, bool copymode)
455 literalstring *s; /* hashtable element */
456 java_chararray_t *ca; /* copy of u2-array */
463 /* find location in hashtable */
465 key = unicode_hashkey(a->data + offset, length);
466 slot = key & (hashtable_string.size - 1);
467 s = (literalstring*) hashtable_string.ptr[slot];
471 java_lang_String js(LLNI_WRAP(s->string));
473 if (length == js.get_count()) {
476 for (i = 0; i < length; i++)
477 // FIXME This is not handle capable!
479 if (a->data[offset + i] != ((java_chararray_t*) LLNI_UNWRAP(js.get_value()))->data[i])
482 /* string already in hashtable, free memory */
485 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
489 return (java_object_t*) LLNI_UNWRAP(js.get_handle());
493 /* follow link in external hash chain */
498 /* create copy of u2-array for new javastring */
499 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
500 ca = (java_chararray_t*) mem_alloc(arraysize);
501 /* memcpy(ca, a, arraysize); */
502 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
503 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
509 /* location in hashtable found, complete arrayheader */
511 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
512 ca->header.size = length;
514 assert(class_java_lang_String);
515 assert(class_java_lang_String->state & CLASS_LOADED);
517 // Create a new java.lang.String object on the system heap.
518 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
520 java_handle_t* h = LLNI_WRAP(o);
522 #if defined(ENABLE_STATISTICS)
524 size_string += sizeof(class_java_lang_String->instancesize);
527 #if defined(ENABLE_THREADS)
528 lock_init_object_lock(o);
531 o->vftbl = class_java_lang_String->vftbl;
534 java_lang_String jls(h, LLNI_WRAP(ca), length);
536 /* create new literalstring */
538 s = NEW(literalstring);
540 #if defined(ENABLE_STATISTICS)
542 size_string += sizeof(literalstring);
545 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
546 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
547 hashtable_string.ptr[slot] = s;
549 /* update number of hashtable entries */
551 hashtable_string.entries++;
553 /* reorganization of hashtable */
555 if (hashtable_string.entries > (hashtable_string.size * 2)) {
556 /* reorganization of hashtable, average length of the external
557 chains is approx. 2 */
561 literalstring *nexts;
562 hashtable newhash; /* the new hashtable */
564 /* create new hashtable, double the size */
566 hashtable_create(&newhash, hashtable_string.size * 2);
567 newhash.entries = hashtable_string.entries;
569 /* transfer elements to new hashtable */
571 for (i = 0; i < hashtable_string.size; i++) {
572 s = (literalstring*) hashtable_string.ptr[i];
576 java_lang_String tmpjls(LLNI_WRAP(s->string));
577 // FIXME This is not handle capable!
578 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
580 s->hashlink = (literalstring*) newhash.ptr[slot];
581 newhash.ptr[slot] = s;
583 /* follow link in external hash chain */
588 /* dispose old table */
590 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
591 hashtable_string = newhash;
596 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
600 /* literalstring_new ***********************************************************
602 Creates a new literalstring with the text of the utf-symbol and inserts
603 it into the string hashtable.
605 *******************************************************************************/
607 java_object_t *literalstring_new(utf *u)
609 char *utf_ptr; /* pointer to current unicode character */
611 u4 utflength; /* length of utf-string if uncompressed */
612 java_chararray_t *a; /* u2-array constructed from utf string */
616 utflength = utf_get_number_of_u2s(u);
618 /* allocate memory */
619 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
621 /* convert utf-string to u2-array */
622 for (i = 0; i < utflength; i++)
623 a->data[i] = utf_nextu2(&utf_ptr);
625 return literalstring_u2(a, utflength, 0, false);
629 /* literalstring_free **********************************************************
631 Removes a literalstring from memory.
633 *******************************************************************************/
636 /* TWISTI This one is currently not used. */
638 static void literalstring_free(java_object_t* string)
643 s = (heapstring_t *) string;
646 /* dispose memory of java.lang.String object */
647 FREE(s, heapstring_t);
649 /* dispose memory of java-characterarray */
650 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
655 /* javastring_intern ***********************************************************
657 Intern the given Java string.
659 XXX NOTE: Literal Strings are direct references since they are not placed
660 onto the GC-Heap. That's why this function looks so "different".
662 *******************************************************************************/
664 java_handle_t *javastring_intern(java_handle_t *string)
666 java_lang_String jls(string);
668 java_handle_chararray_t* value = jls.get_value();
670 java_chararray_t* ca = LLNI_UNWRAP(value); /* XXX see note above */
672 int32_t count = jls.get_count();
673 int32_t offset = jls.get_offset();
675 java_object_t* o = literalstring_u2(ca, count, offset, true); /* XXX see note above */
677 return LLNI_WRAP(o); /* XXX see note above */
681 /* javastring_fprint ***********************************************************
683 Print the given Java string to the given stream.
685 *******************************************************************************/
687 void javastring_fprint(java_handle_t *s, FILE *stream)
689 java_lang_String jls(s);
691 java_handle_chararray_t* value = jls.get_value();
693 int32_t count = jls.get_count();
694 int32_t offset = jls.get_offset();
696 for (int32_t i = offset; i < offset + count; i++) {
697 uint16_t c = LLNI_array_direct(value, i);
704 * These are local overrides for various environment variables in Emacs.
705 * Please do not remove this and leave it at the end of the file, where
706 * Emacs will automagically detect them.
707 * ---------------------------------------------------------------------
710 * indent-tabs-mode: t
714 * vim:noexpandtab:sw=4:ts=4: