1 /* src/vm/string.cpp - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007, 2008
4 CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
6 This file is part of CACAO.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2, or (at
11 your option) any later version.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/llni.h"
40 #include "threads/lock.hpp"
42 #include "vm/array.hpp"
43 #include "vm/jit/builtin.hpp"
44 #include "vm/exceptions.hpp"
45 #include "vm/globals.hpp"
46 #include "vm/javaobjects.hpp"
47 #include "vm/options.h"
48 #include "vm/primitive.hpp"
49 #include "vm/statistics.h"
50 #include "vm/string.hpp"
54 /* global variables ***********************************************************/
56 /* hashsize must be power of 2 */
58 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
60 static hashtable hashtable_string; /* hashtable for javastrings */
64 /* string_init *****************************************************************
66 Initialize the string hashtable lock.
68 *******************************************************************************/
70 bool string_init(void)
72 TRACESUBSYSTEMINITIALIZATION("string_init");
74 /* create string (javastring) hashtable */
76 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
86 /* stringtable_update **********************************************************
88 Traverses the javastring hashtable and sets the vftbl-entries of
89 javastrings which were temporarily set to NULL, because
90 java.lang.Object was not yet loaded.
92 *******************************************************************************/
94 void stringtable_update(void)
97 literalstring *s; /* hashtable entry */
99 for (unsigned int i = 0; i < hashtable_string.size; i++) {
100 s = (literalstring*) hashtable_string.ptr[i];
105 java_lang_String js(LLNI_WRAP(s->string));
107 if (js.is_null() || (js.get_value() == NULL)) {
108 /* error in hashtable found */
109 os::abort("stringtable_update: invalid literalstring in hashtable");
112 a = LLNI_UNWRAP(js.get_value());
114 if (js.get_vftbl() == NULL)
116 LLNI_UNWRAP(js.get_handle())->vftbl = class_java_lang_String->vftbl;
118 if (a->header.objheader.vftbl == NULL)
119 a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
121 /* follow link in external hash chain */
129 /* javastring_new_from_utf_buffer **********************************************
131 Create a new object of type java/lang/String with the text from
132 the specified utf8 buffer.
135 buffer.......points to first char in the buffer
136 blength......number of bytes to read from the buffer
139 the java.lang.String object, or
140 NULL if an exception has been thrown
142 *******************************************************************************/
144 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
146 const char *utf_ptr; /* current utf character in utf string */
150 int32_t utflength = utf_get_number_of_u2s_for_buffer(buffer, blength);
152 java_handle_t* h = builtin_new(class_java_lang_String);
153 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
155 /* javastring or character-array could not be created */
157 if ((h == NULL) || (ca == NULL))
160 /* decompress utf-string */
164 for (int32_t i = 0; i < utflength; i++)
165 LLNI_array_direct(ca, i) = utf_nextu2((char **) &utf_ptr);
167 /* set fields of the javastring-object */
169 java_lang_String jls(h, ca, utflength);
171 return jls.get_handle();
175 /* javastring_safe_new_from_utf8 ***********************************************
177 Create a new object of type java/lang/String with the text from
178 the specified UTF-8 string. This function is safe for invalid UTF-8.
179 (Invalid characters will be replaced by U+fffd.)
182 text.........the UTF-8 string, zero-terminated.
185 the java.lang.String object, or
186 NULL if an exception has been thrown
188 *******************************************************************************/
190 java_handle_t *javastring_safe_new_from_utf8(const char *text)
195 /* Get number of bytes. We need this to completely emulate the messy */
196 /* behaviour of the RI. :( */
198 int32_t nbytes = strlen(text);
200 /* calculate number of Java characters */
202 int32_t len = utf8_safe_number_of_u2s(text, nbytes);
204 /* allocate the String object and the char array */
206 java_handle_t* h = builtin_new(class_java_lang_String);
207 java_handle_chararray_t* ca = builtin_newarray_char(len);
209 /* javastring or character-array could not be created? */
211 if ((h == NULL) || (ca == NULL))
214 /* decompress UTF-8 string */
216 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(ca));
218 /* set fields of the String object */
220 java_lang_String jls(h, ca, len);
222 return jls.get_handle();
226 /* javastring_new_from_utf_string **********************************************
228 Create a new object of type java/lang/String with the text from
229 the specified zero-terminated utf8 string.
232 buffer.......points to first char in the buffer
233 blength......number of bytes to read from the buffer
236 the java.lang.String object, or
237 NULL if an exception has been thrown
239 *******************************************************************************/
241 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
245 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
249 /* javastring_new **************************************************************
251 creates a new object of type java/lang/String with the text of
252 the specified utf8-string
254 return: pointer to the string or NULL if memory is exhausted.
256 *******************************************************************************/
258 java_handle_t *javastring_new(utf *u)
261 exceptions_throw_nullpointerexception();
265 char* utf_ptr = u->text;
266 int32_t utflength = utf_get_number_of_u2s(u);
268 java_handle_t* h = builtin_new(class_java_lang_String);
269 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
271 /* javastring or character-array could not be created */
273 if ((h == NULL) || (ca == NULL))
276 /* decompress utf-string */
278 for (int32_t i = 0; i < utflength; i++)
279 LLNI_array_direct(ca, i) = utf_nextu2(&utf_ptr);
281 /* set fields of the javastring-object */
283 java_lang_String jls(h, ca, utflength);
285 return jls.get_handle();
289 /* javastring_new_slash_to_dot *************************************************
291 creates a new object of type java/lang/String with the text of
292 the specified utf8-string with slashes changed to dots
294 return: pointer to the string or NULL if memory is exhausted.
296 *******************************************************************************/
298 java_handle_t *javastring_new_slash_to_dot(utf *u)
301 exceptions_throw_nullpointerexception();
305 char* utf_ptr = u->text;
306 int32_t utflength = utf_get_number_of_u2s(u);
308 java_handle_t* h = builtin_new(class_java_lang_String);
309 java_handle_chararray_t* ca = builtin_newarray_char(utflength);
311 /* javastring or character-array could not be created */
312 if ((h == NULL) || (ca == NULL))
315 /* decompress utf-string */
317 for (int32_t i = 0; i < utflength; i++) {
318 uint16_t ch = utf_nextu2(&utf_ptr);
323 LLNI_array_direct(ca, i) = ch;
326 /* set fields of the javastring-object */
328 java_lang_String jls(h, ca, utflength);
330 return jls.get_handle();
334 /* javastring_new_from_ascii ***************************************************
336 creates a new java/lang/String object which contains the given ASCII
337 C-string converted to UTF-16.
340 text.........string of ASCII characters
343 the java.lang.String object, or
344 NULL if an exception has been thrown.
346 *******************************************************************************/
348 java_handle_t *javastring_new_from_ascii(const char *text)
351 exceptions_throw_nullpointerexception();
355 int32_t len = strlen(text);
357 java_handle_t* h = builtin_new(class_java_lang_String);
358 java_handle_chararray_t* ca = builtin_newarray_char(len);
360 /* javastring or character-array could not be created */
362 if ((h == NULL) || (ca == NULL))
367 for (int32_t i = 0; i < len; i++)
368 LLNI_array_direct(ca, i) = text[i];
370 /* set fields of the javastring-object */
372 java_lang_String jls(h, ca, len);
374 return jls.get_handle();
378 /* javastring_tochar ***********************************************************
380 converts a Java string into a C string.
382 return: pointer to C string
384 Caution: calling method MUST release the allocated memory!
386 *******************************************************************************/
388 char* javastring_tochar(java_handle_t* h)
390 java_lang_String jls(h);
395 java_handle_chararray_t* ca = jls.get_value();
400 int32_t count = jls.get_count();
401 int32_t offset = jls.get_offset();
403 char* buf = MNEW(char, count + 1);
406 for (i = 0; i < count; i++)
407 buf[i] = LLNI_array_direct(ca, offset + i);
415 /* javastring_toutf ************************************************************
417 Make utf symbol from javastring.
419 *******************************************************************************/
421 utf *javastring_toutf(java_handle_t *string, bool isclassname)
423 java_lang_String jls(string);
428 java_handle_chararray_t* value = jls.get_value();
430 if (jls.get_value() == NULL)
433 int32_t count = jls.get_count();
434 int32_t offset = jls.get_offset();
436 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
440 /* literalstring_u2 ************************************************************
442 Searches for the literalstring with the specified u2-array in the
443 string hashtable, if there is no such string a new one is created.
445 If copymode is true a copy of the u2-array is made.
447 *******************************************************************************/
449 static java_object_t *literalstring_u2(java_chararray_t *a, int32_t length,
450 u4 offset, bool copymode)
452 literalstring *s; /* hashtable element */
453 java_chararray_t *ca; /* copy of u2-array */
460 /* find location in hashtable */
462 key = unicode_hashkey(a->data + offset, length);
463 slot = key & (hashtable_string.size - 1);
464 s = (literalstring*) hashtable_string.ptr[slot];
468 java_lang_String js(LLNI_WRAP(s->string));
470 if (length == js.get_count()) {
473 for (i = 0; i < length; i++)
474 // FIXME This is not handle capable!
476 if (a->data[offset + i] != ((java_chararray_t*) LLNI_UNWRAP(js.get_value()))->data[i])
479 /* string already in hashtable, free memory */
482 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
486 return (java_object_t*) LLNI_UNWRAP(js.get_handle());
490 /* follow link in external hash chain */
495 /* create copy of u2-array for new javastring */
496 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
497 ca = (java_chararray_t*) mem_alloc(arraysize);
498 /* memcpy(ca, a, arraysize); */
499 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
500 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
506 /* location in hashtable found, complete arrayheader */
508 ca->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
509 ca->header.size = length;
511 assert(class_java_lang_String);
512 assert(class_java_lang_String->state & CLASS_LOADED);
514 // Create a new java.lang.String object on the system heap.
515 java_object_t* o = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
517 java_handle_t* h = LLNI_WRAP(o);
519 #if defined(ENABLE_STATISTICS)
521 size_string += sizeof(class_java_lang_String->instancesize);
524 #if defined(ENABLE_THREADS)
528 o->vftbl = class_java_lang_String->vftbl;
531 java_lang_String jls(h, LLNI_WRAP(ca), length);
533 /* create new literalstring */
535 s = NEW(literalstring);
537 #if defined(ENABLE_STATISTICS)
539 size_string += sizeof(literalstring);
542 s->hashlink = (literalstring*) hashtable_string.ptr[slot];
543 s->string = (java_object_t*) LLNI_UNWRAP(jls.get_handle());
544 hashtable_string.ptr[slot] = s;
546 /* update number of hashtable entries */
548 hashtable_string.entries++;
550 /* reorganization of hashtable */
552 if (hashtable_string.entries > (hashtable_string.size * 2)) {
553 /* reorganization of hashtable, average length of the external
554 chains is approx. 2 */
558 literalstring *nexts;
559 hashtable newhash; /* the new hashtable */
561 /* create new hashtable, double the size */
563 hashtable_create(&newhash, hashtable_string.size * 2);
564 newhash.entries = hashtable_string.entries;
566 /* transfer elements to new hashtable */
568 for (i = 0; i < hashtable_string.size; i++) {
569 s = (literalstring*) hashtable_string.ptr[i];
573 java_lang_String tmpjls(LLNI_WRAP(s->string));
574 // FIXME This is not handle capable!
575 slot = unicode_hashkey(((java_chararray_t*) LLNI_UNWRAP(tmpjls.get_value()))->data, tmpjls.get_count()) & (newhash.size - 1);
577 s->hashlink = (literalstring*) newhash.ptr[slot];
578 newhash.ptr[slot] = s;
580 /* follow link in external hash chain */
585 /* dispose old table */
587 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
588 hashtable_string = newhash;
593 return (java_object_t*) LLNI_UNWRAP(jls.get_handle());
597 /* literalstring_new ***********************************************************
599 Creates a new literalstring with the text of the utf-symbol and inserts
600 it into the string hashtable.
602 *******************************************************************************/
604 java_object_t *literalstring_new(utf *u)
606 char *utf_ptr; /* pointer to current unicode character */
608 u4 utflength; /* length of utf-string if uncompressed */
609 java_chararray_t *a; /* u2-array constructed from utf string */
613 utflength = utf_get_number_of_u2s(u);
615 /* allocate memory */
616 a = (java_chararray_t*) mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
618 /* convert utf-string to u2-array */
619 for (i = 0; i < utflength; i++)
620 a->data[i] = utf_nextu2(&utf_ptr);
622 return literalstring_u2(a, utflength, 0, false);
626 /* literalstring_free **********************************************************
628 Removes a literalstring from memory.
630 *******************************************************************************/
633 /* TWISTI This one is currently not used. */
635 static void literalstring_free(java_object_t* string)
640 s = (heapstring_t *) string;
643 /* dispose memory of java.lang.String object */
644 FREE(s, heapstring_t);
646 /* dispose memory of java-characterarray */
647 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
652 /* javastring_intern ***********************************************************
654 Intern the given Java string.
656 XXX NOTE: Literal Strings are direct references since they are not placed
657 onto the GC-Heap. That's why this function looks so "different".
659 *******************************************************************************/
661 java_handle_t *javastring_intern(java_handle_t *string)
663 java_lang_String jls(string);
665 java_handle_chararray_t* value = jls.get_value();
667 java_chararray_t* ca = LLNI_UNWRAP(value); /* XXX see note above */
669 int32_t count = jls.get_count();
670 int32_t offset = jls.get_offset();
672 java_object_t* o = literalstring_u2(ca, count, offset, true); /* XXX see note above */
674 return LLNI_WRAP(o); /* XXX see note above */
678 /* javastring_fprint ***********************************************************
680 Print the given Java string to the given stream.
682 *******************************************************************************/
684 void javastring_fprint(java_handle_t *s, FILE *stream)
686 java_lang_String jls(s);
688 java_handle_chararray_t* value = jls.get_value();
690 int32_t count = jls.get_count();
691 int32_t offset = jls.get_offset();
693 for (int32_t i = offset; i < offset + count; i++) {
694 uint16_t c = LLNI_array_direct(value, i);
701 * These are local overrides for various environment variables in Emacs.
702 * Please do not remove this and leave it at the end of the file, where
703 * Emacs will automagically detect them.
704 * ---------------------------------------------------------------------
707 * indent-tabs-mode: t
711 * vim:noexpandtab:sw=4:ts=4: