1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.hpp"
38 #include "native/llni.h"
40 #include "threads/lock.hpp"
42 #include "vm/array.hpp"
43 #include "vm/jit/builtin.hpp"
44 #include "vm/exceptions.hpp"
45 #include "vm/globals.hpp"
46 #include "vm/javaobjects.hpp"
47 #include "vm/options.h"
48 #include "vm/primitive.hpp"
49 #include "vm/statistics.h"
50 #include "vm/string.hpp"
54 /* global variables ***********************************************************/
56 /* hashsize must be power of 2 */
58 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
60 static hashtable hashtable_string; /* hashtable for javastrings */
64 /* string_init *****************************************************************
66 Initialize the string hashtable lock.
68 *******************************************************************************/
70 bool string_init(void)
72 TRACESUBSYSTEMINITIALIZATION("string_init");
74 /* create string (javastring) hashtable */
76 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
86 /* stringtable_update **********************************************************
88 Traverses the javastring hashtable and sets the vftbl-entries of
89 javastrings which were temporarily set to NULL, because
90 java.lang.Object was not yet loaded.
92 *******************************************************************************/
94 void stringtable_update(void)
96 literalstring *s; /* hashtable entry */
98 for (unsigned int i = 0; i < hashtable_string.size; i++) {
99 s = (literalstring*) hashtable_string.ptr[i];
104 java_lang_String js(LLNI_WRAP(s->string));
106 if (js.is_null() || (js.get_value() == NULL)) {
107 /* error in hashtable found */
108 os::abort("stringtable_update: invalid literalstring in hashtable");
111 java_chararray_t* a = (java_chararray_t*) js.get_value();
113 if (js.get_vftbl() == NULL)
115 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
117 if (a->header.objheader.vftbl == NULL)
118 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
120 /* follow link in external hash chain */
128 /* javastring_new_from_utf_buffer **********************************************
130 Create a new object of type java/lang/String with the text from
131 the specified utf8 buffer.
134 buffer.......points to first char in the buffer
135 blength......number of bytes to read from the buffer
138 the java.lang.String object, or
139 NULL if an exception has been thrown
141 *******************************************************************************/
143 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
145 const char *utf_ptr; /* current utf character in utf string */
149 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
151 java_handle_t* h = builtin_new(class_java_lang_String);
152 CharArray ca(utflength);
154 /* javastring or character-array could not be created */
156 if ((h == NULL) || ca.is_null())
160 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
162 /* decompress utf-string */
166 for (int32_t i = 0; i < utflength; i++)
167 ptr[i] = utf_nextu2((char **) &utf_ptr);
169 /* set fields of the javastring-object */
171 java_lang_String jls(h, ca.get_handle(), utflength);
173 return jls.get_handle();
177 /* javastring_safe_new_from_utf8 ***********************************************
179 Create a new object of type java/lang/String with the text from
180 the specified UTF-8 string. This function is safe for invalid UTF-8.
181 (Invalid characters will be replaced by U+fffd.)
184 text.........the UTF-8 string, zero-terminated.
187 the java.lang.String object, or
188 NULL if an exception has been thrown
190 *******************************************************************************/
192 java_handle_t *javastring_safe_new_from_utf8(const char *text)
197 /* Get number of bytes. We need this to completely emulate the messy */
198 /* behaviour of the RI. :( */
200 int32_t nbytes = strlen(text);
202 /* calculate number of Java characters */
204 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
206 /* allocate the String object and the char array */
208 java_handle_t* h = builtin_new(class_java_lang_String);
211 /* javastring or character-array could not be created? */
213 if ((h == NULL) || ca.is_null())
217 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
219 /* decompress UTF-8 string */
221 utf8_safe_convert_to_u2s(text, nbytes, ptr);
223 /* set fields of the String object */
225 java_lang_String jls(h, ca.get_handle(), len);
227 return jls.get_handle();
231 /* javastring_new_from_utf_string **********************************************
233 Create a new object of type java/lang/String with the text from
234 the specified zero-terminated utf8 string.
237 buffer.......points to first char in the buffer
238 blength......number of bytes to read from the buffer
241 the java.lang.String object, or
242 NULL if an exception has been thrown
244 *******************************************************************************/
246 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
250 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
254 /* javastring_new **************************************************************
256 creates a new object of type java/lang/String with the text of
257 the specified utf8-string
259 return: pointer to the string or NULL if memory is exhausted.
261 *******************************************************************************/
263 java_handle_t *javastring_new(utf *u)
266 exceptions_throw_nullpointerexception();
270 char* utf_ptr = u->text;
271 int32_t utflength = utf_get_number_of_u2s(u);
273 java_handle_t* h = builtin_new(class_java_lang_String);
274 CharArray ca(utflength);
276 /* javastring or character-array could not be created */
278 if ((h == NULL) || ca.is_null())
282 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
284 /* decompress utf-string */
286 for (int32_t i = 0; i < utflength; i++)
287 ptr[i] = utf_nextu2(&utf_ptr);
289 /* set fields of the javastring-object */
291 java_lang_String jls(h, ca.get_handle(), utflength);
293 return jls.get_handle();
297 /* javastring_new_slash_to_dot *************************************************
299 creates a new object of type java/lang/String with the text of
300 the specified utf8-string with slashes changed to dots
302 return: pointer to the string or NULL if memory is exhausted.
304 *******************************************************************************/
306 java_handle_t *javastring_new_slash_to_dot(utf *u)
309 exceptions_throw_nullpointerexception();
313 char* utf_ptr = u->text;
314 int32_t utflength = utf_get_number_of_u2s(u);
316 java_handle_t* h = builtin_new(class_java_lang_String);
317 CharArray ca(utflength);
319 /* javastring or character-array could not be created */
320 if ((h == NULL) || ca.is_null())
324 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
326 /* decompress utf-string */
328 for (int32_t i = 0; i < utflength; i++) {
329 uint16_t ch = utf_nextu2(&utf_ptr);
337 /* set fields of the javastring-object */
339 java_lang_String jls(h, ca.get_handle(), utflength);
341 return jls.get_handle();
345 /* javastring_new_from_ascii ***************************************************
347 creates a new java/lang/String object which contains the given ASCII
348 C-string converted to UTF-16.
351 text.........string of ASCII characters
354 the java.lang.String object, or
355 NULL if an exception has been thrown.
357 *******************************************************************************/
359 java_handle_t *javastring_new_from_ascii(const char *text)
362 exceptions_throw_nullpointerexception();
366 int32_t len = strlen(text);
368 java_handle_t* h = builtin_new(class_java_lang_String);
371 /* javastring or character-array could not be created */
373 if ((h == NULL) || ca.is_null())
377 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
381 for (int32_t i = 0; i < len; i++)
384 /* set fields of the javastring-object */
386 java_lang_String jls(h, ca.get_handle(), len);
388 return jls.get_handle();
392 /* javastring_tochar ***********************************************************
394 converts a Java string into a C string.
396 return: pointer to C string
398 Caution: calling method MUST release the allocated memory!
400 *******************************************************************************/
402 char* javastring_tochar(java_handle_t* h)
404 java_lang_String jls(h);
409 CharArray ca(jls.get_value());
414 int32_t count = jls.get_count();
415 int32_t offset = jls.get_offset();
417 char* buf = MNEW(char, count + 1);
420 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
423 for (i = 0; i < count; i++)
424 buf[i] = ptr[offset + i];
432 /* javastring_toutf ************************************************************
434 Make utf symbol from javastring.
436 *******************************************************************************/
438 utf *javastring_toutf(java_handle_t *string, bool isclassname)
440 java_lang_String jls(string);
445 CharArray ca(jls.get_value());
450 int32_t count = jls.get_count();
451 int32_t offset = jls.get_offset();
454 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
456 return utf_new_u2(ptr + offset, count, isclassname);
460 /* literalstring_u2 ************************************************************
462 Searches for the literalstring with the specified u2-array in the
463 string hashtable, if there is no such string a new one is created.
465 If copymode is true a copy of the u2-array is made.
467 *******************************************************************************/
469 static java_handle_t *literalstring_u2(java_handle_chararray_t *a, int32_t length,
470 u4 offset, bool copymode)
472 literalstring *s; /* hashtable element */
481 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
483 /* find location in hashtable */
485 key = unicode_hashkey(ptr + offset, length);
486 slot = key & (hashtable_string.size - 1);
487 s = (literalstring*) hashtable_string.ptr[slot];
491 java_lang_String js(LLNI_WRAP(s->string));
493 if (length == js.get_count()) {
496 for (i = 0; i < length; i++) {
497 // FIXME This is not handle capable!
498 CharArray jsca(js.get_value());
499 uint16_t* sptr = (uint16_t*) jsca.get_raw_data_ptr();
501 if (ptr[offset + i] != sptr[i])
505 /* string already in hashtable, free memory */
508 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
512 return js.get_handle();
516 /* follow link in external hash chain */
520 java_chararray_t* acopy;
522 /* create copy of u2-array for new javastring */
523 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
524 acopy = (java_chararray_t*) mem_alloc(arraysize);
525 /* memcpy(ca, a, arraysize); */
526 memcpy(&(acopy->header), &(((java_chararray_t*) a)->header), sizeof(java_array_t));
527 memcpy(&(acopy->data), &(((java_chararray_t*) a)->data) + offset, sizeof(u2) * (length - 1) + 10);
530 acopy = (java_chararray_t*) a;
533 /* location in hashtable found, complete arrayheader */
535 acopy->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
536 acopy->header.size = length;
538 assert(class_java_lang_String);
539 assert(class_java_lang_String->state & CLASS_LOADED);
541 // Create a new java.lang.String object on the system heap.
542 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
544 #if defined(ENABLE_STATISTICS)
546 size_string += sizeof(class_java_lang_String->instancesize);
549 #if defined(ENABLE_THREADS)
553 o->vftbl = class_java_lang_String->vftbl;
555 CharArray cacopy((java_handle_chararray_t*) acopy);
556 java_lang_String jls(o, cacopy.get_handle(), length);
558 /* create new literalstring */
560 s = NEW(literalstring);
562 #if defined(ENABLE_STATISTICS)
564 size_string += sizeof(literalstring);
567 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
568 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
569 hashtable_string.ptr[slot] = s;
571 /* update number of hashtable entries */
573 hashtable_string.entries++;
575 /* reorganization of hashtable */
577 if (hashtable_string.entries > (hashtable_string.size * 2)) {
578 /* reorganization of hashtable, average length of the external
579 chains is approx. 2 */
583 literalstring *nexts;
584 hashtable newhash; /* the new hashtable */
586 /* create new hashtable, double the size */
588 hashtable_create(&newhash, hashtable_string.size * 2);
589 newhash.entries = hashtable_string.entries;
591 /* transfer elements to new hashtable */
593 for (i = 0; i < hashtable_string.size; i++) {
594 s = (literalstring*) hashtable_string.ptr[i];
598 java_lang_String tmpjls(LLNI_WRAP(s->string));
599 // FIXME This is not handle capable!
600 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
602 s->hashlink = (literalstring*) newhash.ptr[slot];
603 newhash.ptr[slot] = s;
605 /* follow link in external hash chain */
610 /* dispose old table */
612 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
613 hashtable_string = newhash;
618 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
622 /* literalstring_new ***********************************************************
624 Creates a new literalstring with the text of the utf-symbol and inserts
625 it into the string hashtable.
627 *******************************************************************************/
629 java_object_t *literalstring_new(utf *u)
631 char *utf_ptr; /* pointer to current unicode character */
633 u4 utflength; /* length of utf-string if uncompressed */
634 java_chararray_t *a; /* u2-array constructed from utf string */
638 utflength = utf_get_number_of_u2s(u);
640 /* allocate memory */
641 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
643 /* convert utf-string to u2-array */
644 for (i = 0; i < utflength; i++)
645 a->data[i] = utf_nextu2(&utf_ptr);
647 return literalstring_u2((java_handle_chararray_t*) a, utflength, 0, false);
651 /* literalstring_free **********************************************************
653 Removes a literalstring from memory.
655 *******************************************************************************/
658 /* TWISTI This one is currently not used. */
660 static void literalstring_free(java_object_t* string)
665 s = (heapstring_t *) string;
668 /* dispose memory of java.lang.String object */
669 FREE(s, heapstring_t);
671 /* dispose memory of java-characterarray */
672 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
677 /* javastring_intern ***********************************************************
679 Intern the given Java string.
681 *******************************************************************************/
683 java_handle_t *javastring_intern(java_handle_t *string)
685 java_lang_String jls(string);
687 CharArray ca(jls.get_value());
689 int32_t count = jls.get_count();
690 int32_t offset = jls.get_offset();
692 java_handle_t* o = literalstring_u2(ca.get_handle(), count, offset, true);
698 /* javastring_fprint ***********************************************************
700 Print the given Java string to the given stream.
702 *******************************************************************************/
704 void javastring_fprint(java_handle_t *s, FILE *stream)
706 java_lang_String jls(s);
708 CharArray ca(jls.get_value());
710 int32_t count = jls.get_count();
711 int32_t offset = jls.get_offset();
714 uint16_t* ptr = (uint16_t*) ca.get_raw_data_ptr();
716 for (int32_t i = offset; i < offset + count; i++) {
724 * These are local overrides for various environment variables in Emacs.
725 * Please do not remove this and leave it at the end of the file, where
726 * Emacs will automagically detect them.
727 * ---------------------------------------------------------------------
730 * indent-tabs-mode: t
734 * vim:noexpandtab:sw=4:ts=4: