1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/llni.h"
40 #include "threads/lock-common.h"
43 #include "vm/jit/builtin.hpp"
44 #include "vm/exceptions.hpp"
45 #include "vm/globals.hpp"
46 #include "vm/javaobjects.hpp"
47 #include "vm/options.h"
48 #include "vm/primitive.hpp"
49 #include "vm/statistics.h"
50 #include "vm/string.hpp"
55 /* global variables ***********************************************************/
57 /* hashsize must be power of 2 */
59 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
61 static hashtable hashtable_string; /* hashtable for javastrings */
65 /* string_init *****************************************************************
67 Initialize the string hashtable lock.
69 *******************************************************************************/
71 bool string_init(void)
73 TRACESUBSYSTEMINITIALIZATION("string_init");
75 /* create string (javastring) hashtable */
77 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
87 /* stringtable_update **********************************************************
89 Traverses the javastring hashtable and sets the vftbl-entries of
90 javastrings which were temporarily set to NULL, because
91 java.lang.Object was not yet loaded.
93 *******************************************************************************/
95 void stringtable_update(void)
98 literalstring *s; /* hashtable entry */
100 for (unsigned int i = 0; i < hashtable_string.size; i++) {
101 s = (literalstring*) hashtable_string.ptr[i];
106 java_lang_String js(LLNI_WRAP(s->string));
108 if (js.is_null() || (js.get_value() == NULL)) {
109 /* error in hashtable found */
110 VM::get_current()->abort("stringtable_update: invalid literalstring in hashtable");
113 a = LLNI_UNWRAP(js.get_value());
115 if (js.get_vftbl() == NULL)
117 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
119 if (a->header.objheader.vftbl == NULL)
120 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
122 /* follow link in external hash chain */
130 /* javastring_new_from_utf_buffer **********************************************
132 Create a new object of type java/lang/String with the text from
133 the specified utf8 buffer.
136 buffer.......points to first char in the buffer
137 blength......number of bytes to read from the buffer
140 the java.lang.String object, or
141 NULL if an exception has been thrown
143 *******************************************************************************/
145 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
147 const char *utf_ptr; /* current utf character in utf string */
151 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
153 java_handle_t* h = builtin_new(class_java_lang_String);
154 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
156 /* javastring or character-array could not be created */
158 if ((h == NULL) || (ca == NULL))
161 /* decompress utf-string */
165 for (int32_t i = 0; i < utflength; i++)
166 LLNI_array_direct(ca, i) = utf_nextu2((char **) &utf_ptr);
168 /* set fields of the javastring-object */
170 java_lang_String jls(h, ca, utflength);
172 return jls.get_handle();
176 /* javastring_safe_new_from_utf8 ***********************************************
178 Create a new object of type java/lang/String with the text from
179 the specified UTF-8 string. This function is safe for invalid UTF-8.
180 (Invalid characters will be replaced by U+fffd.)
183 text.........the UTF-8 string, zero-terminated.
186 the java.lang.String object, or
187 NULL if an exception has been thrown
189 *******************************************************************************/
191 java_handle_t *javastring_safe_new_from_utf8(const char *text)
196 /* Get number of bytes. We need this to completely emulate the messy */
197 /* behaviour of the RI. :( */
199 int32_t nbytes = strlen(text);
201 /* calculate number of Java characters */
203 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
205 /* allocate the String object and the char array */
207 java_handle_t* h = builtin_new(class_java_lang_String);
208 java_handle_chararray_t* ca = builtin_newarray_char(len);
210 /* javastring or character-array could not be created? */
212 if ((h == NULL) || (ca == NULL))
215 /* decompress UTF-8 string */
217 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(ca));
219 /* set fields of the String object */
221 java_lang_String jls(h, ca, len);
223 return jls.get_handle();
227 /* javastring_new_from_utf_string **********************************************
229 Create a new object of type java/lang/String with the text from
230 the specified zero-terminated utf8 string.
233 buffer.......points to first char in the buffer
234 blength......number of bytes to read from the buffer
237 the java.lang.String object, or
238 NULL if an exception has been thrown
240 *******************************************************************************/
242 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
246 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
250 /* javastring_new **************************************************************
252 creates a new object of type java/lang/String with the text of
253 the specified utf8-string
255 return: pointer to the string or NULL if memory is exhausted.
257 *******************************************************************************/
259 java_handle_t *javastring_new(utf *u)
262 exceptions_throw_nullpointerexception();
266 char* utf_ptr = u->text;
267 int32_t utflength = utf_get_number_of_u2s(u);
269 java_handle_t* h = builtin_new(class_java_lang_String);
270 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
272 /* javastring or character-array could not be created */
274 if ((h == NULL) || (ca == NULL))
277 /* decompress utf-string */
279 for (int32_t i = 0; i < utflength; i++)
280 LLNI_array_direct(ca, i) = utf_nextu2(&utf_ptr);
282 /* set fields of the javastring-object */
284 java_lang_String jls(h, ca, utflength);
286 return jls.get_handle();
290 /* javastring_new_slash_to_dot *************************************************
292 creates a new object of type java/lang/String with the text of
293 the specified utf8-string with slashes changed to dots
295 return: pointer to the string or NULL if memory is exhausted.
297 *******************************************************************************/
299 java_handle_t *javastring_new_slash_to_dot(utf *u)
302 exceptions_throw_nullpointerexception();
306 char* utf_ptr = u->text;
307 int32_t utflength = utf_get_number_of_u2s(u);
309 java_handle_t* h = builtin_new(class_java_lang_String);
310 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
312 /* javastring or character-array could not be created */
313 if ((h == NULL) || (ca == NULL))
316 /* decompress utf-string */
318 for (int32_t i = 0; i < utflength; i++) {
319 uint16_t ch = utf_nextu2(&utf_ptr);
324 LLNI_array_direct(ca, i) = ch;
327 /* set fields of the javastring-object */
329 java_lang_String jls(h, ca, utflength);
331 return jls.get_handle();
335 /* javastring_new_from_ascii ***************************************************
337 creates a new java/lang/String object which contains the given ASCII
338 C-string converted to UTF-16.
341 text.........string of ASCII characters
344 the java.lang.String object, or
345 NULL if an exception has been thrown.
347 *******************************************************************************/
349 java_handle_t *javastring_new_from_ascii(const char *text)
352 exceptions_throw_nullpointerexception();
356 int32_t len = strlen(text);
358 java_handle_t* h = builtin_new(class_java_lang_String);
359 java_handle_chararray_t* ca = builtin_newarray_char(len);
361 /* javastring or character-array could not be created */
363 if ((h == NULL) || (ca == NULL))
368 for (int32_t i = 0; i < len; i++)
369 LLNI_array_direct(ca, i) = text[i];
371 /* set fields of the javastring-object */
373 java_lang_String jls(h, ca, len);
375 return jls.get_handle();
379 /* javastring_tochar ***********************************************************
381 converts a Java string into a C string.
383 return: pointer to C string
385 Caution: calling method MUST release the allocated memory!
387 *******************************************************************************/
389 char* javastring_tochar(java_handle_t* h)
391 java_lang_String jls(h);
396 java_handle_chararray_t* ca = jls.get_value();
401 int32_t count = jls.get_count();
402 int32_t offset = jls.get_offset();
404 char* buf = MNEW(char, count + 1);
407 for (i = 0; i < count; i++)
408 buf[i] = LLNI_array_direct(ca, offset + i);
416 /* javastring_toutf ************************************************************
418 Make utf symbol from javastring.
420 *******************************************************************************/
422 utf *javastring_toutf(java_handle_t *string, bool isclassname)
424 java_lang_String jls(string);
429 java_handle_chararray_t* value = jls.get_value();
431 if (jls.get_value() == NULL)
434 int32_t count = jls.get_count();
435 int32_t offset = jls.get_offset();
437 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
441 /* literalstring_u2 ************************************************************
443 Searches for the literalstring with the specified u2-array in the
444 string hashtable, if there is no such string a new one is created.
446 If copymode is true a copy of the u2-array is made.
448 *******************************************************************************/
450 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
451 u4 offset, bool copymode)
453 literalstring *s; /* hashtable element */
454 java_chararray_t *ca; /* copy of u2-array */
461 /* find location in hashtable */
463 key = unicode_hashkey(a->data + offset, length);
464 slot = key & (hashtable_string.size - 1);
465 s = (literalstring*) hashtable_string.ptr[slot];
469 java_lang_String js(LLNI_WRAP(s->string));
471 if (length == js.get_count()) {
474 for (i = 0; i < length; i++)
475 // FIXME This is not handle capable!
477 if (a->data[offset + i] != ((java_chararray_t*) LLNI_UNWRAP(js.get_value()))->data[i])
480 /* string already in hashtable, free memory */
483 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
487 return (java_object_t*) LLNI_UNWRAP(js.get_handle());
491 /* follow link in external hash chain */
496 /* create copy of u2-array for new javastring */
497 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
498 ca = (java_chararray_t*) mem_alloc(arraysize);
499 /* memcpy(ca, a, arraysize); */
500 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
501 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
507 /* location in hashtable found, complete arrayheader */
509 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
510 ca->header.size = length;
512 assert(class_java_lang_String);
513 assert(class_java_lang_String->state & CLASS_LOADED);
515 // Create a new java.lang.String object on the system heap.
516 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
518 java_handle_t* h = LLNI_WRAP(o);
520 #if defined(ENABLE_STATISTICS)
522 size_string += sizeof(class_java_lang_String->instancesize);
525 #if defined(ENABLE_THREADS)
526 lock_init_object_lock(o);
529 o->vftbl = class_java_lang_String->vftbl;
532 java_lang_String jls(h, LLNI_WRAP(ca), length);
534 /* create new literalstring */
536 s = NEW(literalstring);
538 #if defined(ENABLE_STATISTICS)
540 size_string += sizeof(literalstring);
543 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
544 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
545 hashtable_string.ptr[slot] = s;
547 /* update number of hashtable entries */
549 hashtable_string.entries++;
551 /* reorganization of hashtable */
553 if (hashtable_string.entries > (hashtable_string.size * 2)) {
554 /* reorganization of hashtable, average length of the external
555 chains is approx. 2 */
559 literalstring *nexts;
560 hashtable newhash; /* the new hashtable */
562 /* create new hashtable, double the size */
564 hashtable_create(&newhash, hashtable_string.size * 2);
565 newhash.entries = hashtable_string.entries;
567 /* transfer elements to new hashtable */
569 for (i = 0; i < hashtable_string.size; i++) {
570 s = (literalstring*) hashtable_string.ptr[i];
574 java_lang_String tmpjls(LLNI_WRAP(s->string));
575 // FIXME This is not handle capable!
576 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
578 s->hashlink = (literalstring*) newhash.ptr[slot];
579 newhash.ptr[slot] = s;
581 /* follow link in external hash chain */
586 /* dispose old table */
588 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
589 hashtable_string = newhash;
594 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
598 /* literalstring_new ***********************************************************
600 Creates a new literalstring with the text of the utf-symbol and inserts
601 it into the string hashtable.
603 *******************************************************************************/
605 java_object_t *literalstring_new(utf *u)
607 char *utf_ptr; /* pointer to current unicode character */
609 u4 utflength; /* length of utf-string if uncompressed */
610 java_chararray_t *a; /* u2-array constructed from utf string */
614 utflength = utf_get_number_of_u2s(u);
616 /* allocate memory */
617 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
619 /* convert utf-string to u2-array */
620 for (i = 0; i < utflength; i++)
621 a->data[i] = utf_nextu2(&utf_ptr);
623 return literalstring_u2(a, utflength, 0, false);
627 /* literalstring_free **********************************************************
629 Removes a literalstring from memory.
631 *******************************************************************************/
634 /* TWISTI This one is currently not used. */
636 static void literalstring_free(java_object_t* string)
641 s = (heapstring_t *) string;
644 /* dispose memory of java.lang.String object */
645 FREE(s, heapstring_t);
647 /* dispose memory of java-characterarray */
648 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
653 /* javastring_intern ***********************************************************
655 Intern the given Java string.
657 XXX NOTE: Literal Strings are direct references since they are not placed
658 onto the GC-Heap. That's why this function looks so "different".
660 *******************************************************************************/
662 java_handle_t *javastring_intern(java_handle_t *string)
664 java_lang_String jls(string);
666 java_handle_chararray_t* value = jls.get_value();
668 java_chararray_t* ca = LLNI_UNWRAP(value); /* XXX see note above */
670 int32_t count = jls.get_count();
671 int32_t offset = jls.get_offset();
673 java_object_t* o = literalstring_u2(ca, count, offset, true); /* XXX see note above */
675 return LLNI_WRAP(o); /* XXX see note above */
679 /* javastring_fprint ***********************************************************
681 Print the given Java string to the given stream.
683 *******************************************************************************/
685 void javastring_fprint(java_handle_t *s, FILE *stream)
687 java_lang_String jls(s);
689 java_handle_chararray_t* value = jls.get_value();
691 int32_t count = jls.get_count();
692 int32_t offset = jls.get_offset();
694 for (int32_t i = offset; i < offset + count; i++) {
695 uint16_t c = LLNI_array_direct(value, i);
702 * These are local overrides for various environment variables in Emacs.
703 * Please do not remove this and leave it at the end of the file, where
704 * Emacs will automagically detect them.
705 * ---------------------------------------------------------------------
708 * indent-tabs-mode: t
712 * vim:noexpandtab:sw=4:ts=4: