1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "native/include/java_lang_String.h"
43 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.h"
48 #include "vm/primitive.h"
49 #include "vm/stringlocal.h"
52 #include "vmcore/options.h"
53 #include "vmcore/statistics.h"
54 #include "vmcore/utf8.h"
57 /* global variables ***********************************************************/
59 /* hashsize must be power of 2 */
61 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
63 hashtable hashtable_string; /* hashtable for javastrings */
65 #if defined(ENABLE_THREADS)
66 static java_object_t *lock_hashtable_string;
70 /* XXX preliminary typedef, will be removed once string.c and utf8.c are
73 #if defined(ENABLE_HANDLES)
74 typedef heap_java_lang_String heapstring_t;
76 typedef java_lang_String heapstring_t;
80 /* string_init *****************************************************************
82 Initialize the string hashtable lock.
84 *******************************************************************************/
86 bool string_init(void)
88 /* create string (javastring) hashtable */
90 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
92 #if defined(ENABLE_THREADS)
93 /* create string hashtable lock object */
95 lock_hashtable_string = NEW(java_object_t);
97 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
100 /* everything's ok */
106 /* stringtable_update **********************************************************
108 Traverses the javastring hashtable and sets the vftbl-entries of
109 javastrings which were temporarily set to NULL, because
110 java.lang.Object was not yet loaded.
112 *******************************************************************************/
114 void stringtable_update(void)
118 literalstring *s; /* hashtable entry */
121 for (i = 0; i < hashtable_string.size; i++) {
122 s = hashtable_string.ptr[i];
125 js = (heapstring_t *) s->string;
127 if ((js == NULL) || (js->value == NULL)) {
128 /* error in hashtable found */
130 vm_abort("stringtable_update: invalid literalstring in hashtable");
135 if (!js->header.vftbl)
136 /* vftbl of javastring is NULL */
137 js->header.vftbl = class_java_lang_String->vftbl;
139 if (!a->header.objheader.vftbl)
140 /* vftbl of character-array is NULL */
141 a->header.objheader.vftbl =
142 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
144 /* follow link in external hash chain */
152 /* javastring_new_from_utf_buffer **********************************************
154 Create a new object of type java/lang/String with the text from
155 the specified utf8 buffer.
158 buffer.......points to first char in the buffer
159 blength......number of bytes to read from the buffer
162 the java.lang.String object, or
163 NULL if an exception has been thrown
165 *******************************************************************************/
167 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
170 const char *utf_ptr; /* current utf character in utf string */
171 u4 utflength; /* length of utf-string if uncompressed */
173 java_lang_String *s; /* result-string */
174 java_handle_chararray_t *a;
179 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
181 o = builtin_new(class_java_lang_String);
182 a = builtin_newarray_char(utflength);
184 /* javastring or character-array could not be created */
186 if ((o == NULL) || (a == NULL))
189 /* decompress utf-string */
193 for (i = 0; i < utflength; i++)
194 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
196 /* set fields of the javastring-object */
198 s = (java_lang_String *) o;
200 LLNI_field_set_ref(s, value , a);
201 LLNI_field_set_val(s, offset, 0);
202 LLNI_field_set_val(s, count , utflength);
208 /* javastring_safe_new_from_utf8 ***********************************************
210 Create a new object of type java/lang/String with the text from
211 the specified UTF-8 string. This function is safe for invalid UTF-8.
212 (Invalid characters will be replaced by U+fffd.)
215 text.........the UTF-8 string, zero-terminated.
218 the java.lang.String object, or
219 NULL if an exception has been thrown
221 *******************************************************************************/
223 java_handle_t *javastring_safe_new_from_utf8(const char *text)
226 java_handle_chararray_t *a;
233 /* Get number of bytes. We need this to completely emulate the messy */
234 /* behaviour of the RI. :( */
236 nbytes = strlen(text);
238 /* calculate number of Java characters */
240 len = utf8_safe_number_of_u2s(text, nbytes);
242 /* allocate the String object and the char array */
244 o = builtin_new(class_java_lang_String);
245 a = builtin_newarray_char(len);
247 /* javastring or character-array could not be created? */
249 if ((o == NULL) || (a == NULL))
252 /* decompress UTF-8 string */
254 utf8_safe_convert_to_u2s(text, nbytes, LLNI_array_data(a));
256 /* set fields of the String object */
258 s = (java_lang_String *) o;
260 LLNI_field_set_ref(s, value , a);
261 LLNI_field_set_val(s, offset, 0);
262 LLNI_field_set_val(s, count , len);
268 /* javastring_new_from_utf_string **********************************************
270 Create a new object of type java/lang/String with the text from
271 the specified zero-terminated utf8 string.
274 buffer.......points to first char in the buffer
275 blength......number of bytes to read from the buffer
278 the java.lang.String object, or
279 NULL if an exception has been thrown
281 *******************************************************************************/
283 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
287 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
291 /* javastring_new **************************************************************
293 creates a new object of type java/lang/String with the text of
294 the specified utf8-string
296 return: pointer to the string or NULL if memory is exhausted.
298 *******************************************************************************/
300 java_handle_t *javastring_new(utf *u)
302 char *utf_ptr; /* current utf character in utf string */
303 u4 utflength; /* length of utf-string if uncompressed */
305 java_handle_chararray_t *a;
310 exceptions_throw_nullpointerexception();
315 utflength = utf_get_number_of_u2s(u);
317 o = builtin_new(class_java_lang_String);
318 a = builtin_newarray_char(utflength);
320 /* javastring or character-array could not be created */
322 if ((o == NULL) || (a == NULL))
325 /* decompress utf-string */
327 for (i = 0; i < utflength; i++)
328 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
330 /* set fields of the javastring-object */
332 s = (java_lang_String *) o;
334 LLNI_field_set_ref(s, value , a);
335 LLNI_field_set_val(s, offset, 0);
336 LLNI_field_set_val(s, count , utflength);
342 /* javastring_new_slash_to_dot *************************************************
344 creates a new object of type java/lang/String with the text of
345 the specified utf8-string with slashes changed to dots
347 return: pointer to the string or NULL if memory is exhausted.
349 *******************************************************************************/
351 java_handle_t *javastring_new_slash_to_dot(utf *u)
353 char *utf_ptr; /* current utf character in utf string */
354 u4 utflength; /* length of utf-string if uncompressed */
356 java_handle_chararray_t *a;
362 exceptions_throw_nullpointerexception();
367 utflength = utf_get_number_of_u2s(u);
369 o = builtin_new(class_java_lang_String);
370 a = builtin_newarray_char(utflength);
372 /* javastring or character-array could not be created */
373 if ((o == NULL) || (a == NULL))
376 /* decompress utf-string */
378 for (i = 0; i < utflength; i++) {
379 ch = utf_nextu2(&utf_ptr);
382 LLNI_array_direct(a, i) = ch;
385 /* set fields of the javastring-object */
387 s = (java_lang_String *) o;
389 LLNI_field_set_ref(s, value , a);
390 LLNI_field_set_val(s, offset, 0);
391 LLNI_field_set_val(s, count , utflength);
397 /* javastring_new_from_ascii ***************************************************
399 creates a new java/lang/String object which contains the given ASCII
400 C-string converted to UTF-16.
403 text.........string of ASCII characters
406 the java.lang.String object, or
407 NULL if an exception has been thrown.
409 *******************************************************************************/
411 java_handle_t *javastring_new_from_ascii(const char *text)
414 s4 len; /* length of the string */
417 java_handle_chararray_t *a;
420 exceptions_throw_nullpointerexception();
426 o = builtin_new(class_java_lang_String);
427 a = builtin_newarray_char(len);
429 /* javastring or character-array could not be created */
431 if ((o == NULL) || (a == NULL))
436 for (i = 0; i < len; i++)
437 LLNI_array_direct(a, i) = text[i];
439 /* set fields of the javastring-object */
441 s = (java_lang_String *) o;
443 LLNI_field_set_ref(s, value , a);
444 LLNI_field_set_val(s, offset, 0);
445 LLNI_field_set_val(s, count , len);
451 /* javastring_tochar ***********************************************************
453 converts a Java string into a C string.
455 return: pointer to C string
457 Caution: calling method MUST release the allocated memory!
459 *******************************************************************************/
461 char *javastring_tochar(java_handle_t *so)
463 java_lang_String *s = (java_lang_String *) so;
464 java_handle_chararray_t *a;
473 LLNI_field_get_ref(s, value, a);
478 LLNI_field_get_val(s, count, count);
479 LLNI_field_get_val(s, offset, offset);
481 buf = MNEW(char, count + 1);
483 for (i = 0; i < count; i++)
484 buf[i] = LLNI_array_direct(a, offset + i);
492 /* javastring_toutf ************************************************************
494 Make utf symbol from javastring.
496 *******************************************************************************/
498 utf *javastring_toutf(java_handle_t *string, bool isclassname)
501 java_handle_chararray_t *value;
505 s = (java_lang_String *) string;
510 LLNI_field_get_ref(s, value, value);
511 LLNI_field_get_val(s, count, count);
512 LLNI_field_get_val(s, offset, offset);
514 return utf_new_u2(LLNI_array_data(value) + offset, count, isclassname);
518 /* literalstring_u2 ************************************************************
520 Searches for the literalstring with the specified u2-array in the
521 string hashtable, if there is no such string a new one is created.
523 If copymode is true a copy of the u2-array is made.
525 *******************************************************************************/
527 static java_object_t *literalstring_u2(java_chararray_t *a, u4 length,
528 u4 offset, bool copymode)
530 literalstring *s; /* hashtable element */
531 heapstring_t *js; /* u2-array wrapped in javastring */
532 java_chararray_t *ca; /* copy of u2-array */
537 LOCK_MONITOR_ENTER(lock_hashtable_string);
539 /* find location in hashtable */
541 key = unicode_hashkey(a->data + offset, length);
542 slot = key & (hashtable_string.size - 1);
543 s = hashtable_string.ptr[slot];
546 js = (heapstring_t *) s->string;
548 if (length == js->count) {
551 for (i = 0; i < length; i++)
552 if (a->data[offset + i] != js->value->data[i])
555 /* string already in hashtable, free memory */
558 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
560 LOCK_MONITOR_EXIT(lock_hashtable_string);
562 return (java_object_t *) js;
566 /* follow link in external hash chain */
571 /* create copy of u2-array for new javastring */
572 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
573 ca = mem_alloc(arraysize);
574 /* memcpy(ca, a, arraysize); */
575 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
576 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
582 /* location in hashtable found, complete arrayheader */
584 ca->header.objheader.vftbl =
585 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
586 ca->header.size = length;
588 assert(class_java_lang_String);
589 assert(class_java_lang_String->state & CLASS_LOADED);
591 /* create new javastring */
593 js = NEW(heapstring_t);
595 #if defined(ENABLE_STATISTICS)
597 size_string += sizeof(heapstring_t);
600 #if defined(ENABLE_THREADS)
601 lock_init_object_lock(&js->header);
604 js->header.vftbl = class_java_lang_String->vftbl;
609 /* create new literalstring */
611 s = NEW(literalstring);
613 #if defined(ENABLE_STATISTICS)
615 size_string += sizeof(literalstring);
618 s->hashlink = hashtable_string.ptr[slot];
619 s->string = (java_object_t *) js;
620 hashtable_string.ptr[slot] = s;
622 /* update number of hashtable entries */
624 hashtable_string.entries++;
626 /* reorganization of hashtable */
628 if (hashtable_string.entries > (hashtable_string.size * 2)) {
629 /* reorganization of hashtable, average length of the external
630 chains is approx. 2 */
634 literalstring *nexts;
636 hashtable newhash; /* the new hashtable */
638 /* create new hashtable, double the size */
640 hashtable_create(&newhash, hashtable_string.size * 2);
641 newhash.entries = hashtable_string.entries;
643 /* transfer elements to new hashtable */
645 for (i = 0; i < hashtable_string.size; i++) {
646 s = hashtable_string.ptr[i];
650 tmpjs = (heapstring_t *) s->string;
651 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
653 s->hashlink = newhash.ptr[slot];
654 newhash.ptr[slot] = s;
656 /* follow link in external hash chain */
661 /* dispose old table */
663 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
664 hashtable_string = newhash;
667 LOCK_MONITOR_EXIT(lock_hashtable_string);
669 return (java_object_t *) js;
673 /* literalstring_new ***********************************************************
675 Creates a new literalstring with the text of the utf-symbol and inserts
676 it into the string hashtable.
678 *******************************************************************************/
680 java_object_t *literalstring_new(utf *u)
682 char *utf_ptr; /* pointer to current unicode character */
684 u4 utflength; /* length of utf-string if uncompressed */
685 java_chararray_t *a; /* u2-array constructed from utf string */
689 utflength = utf_get_number_of_u2s(u);
691 /* allocate memory */
692 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
694 /* convert utf-string to u2-array */
695 for (i = 0; i < utflength; i++)
696 a->data[i] = utf_nextu2(&utf_ptr);
698 return literalstring_u2(a, utflength, 0, false);
702 /* literalstring_free **********************************************************
704 Removes a literalstring from memory.
706 *******************************************************************************/
708 static void literalstring_free(java_object_t* string)
713 s = (heapstring_t *) string;
716 /* dispose memory of java.lang.String object */
717 FREE(s, heapstring_t);
719 /* dispose memory of java-characterarray */
720 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
724 /* javastring_intern ***********************************************************
726 Intern the given Java string.
728 XXX NOTE: Literal Strings are direct references since they are not placed
729 onto the GC-Heap. That's why this function looks so "different".
731 *******************************************************************************/
733 java_handle_t *javastring_intern(java_handle_t *s)
735 java_lang_String *so;
736 java_chararray_t *value;
739 /* java_lang_String *o; */
740 java_object_t *o; /* XXX see note above */
742 so = (java_lang_String *) s;
744 value = LLNI_field_direct(so, value); /* XXX see note above */
745 LLNI_field_get_val(so, count, count);
746 LLNI_field_get_val(so, offset, offset);
748 o = literalstring_u2(value, count, offset, true);
750 return LLNI_WRAP(o); /* XXX see note above */
754 /* javastring_print ************************************************************
756 Print the given Java string.
758 *******************************************************************************/
760 void javastring_print(java_handle_t *s)
762 java_lang_String *so;
763 java_handle_chararray_t *value;
769 so = (java_lang_String *) s;
771 LLNI_field_get_ref(so, value, value);
772 LLNI_field_get_val(so, count, count);
773 LLNI_field_get_val(so, offset, offset);
775 for (i = offset; i < offset + count; i++) {
776 c = LLNI_array_direct(value, i);
783 * These are local overrides for various environment variables in Emacs.
784 * Please do not remove this and leave it at the end of the file, where
785 * Emacs will automagically detect them.
786 * ---------------------------------------------------------------------
789 * indent-tabs-mode: t
793 * vim:noexpandtab:sw=4:ts=4: