1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "vm/global.h"
36 #include "mm/memory.h"
38 #include "native/jni.h"
39 #include "native/llni.h"
41 #include "native/include/java_lang_String.h"
43 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.h"
48 #include "vm/primitive.h"
49 #include "vm/stringlocal.h"
52 #include "vmcore/options.h"
53 #include "vmcore/statistics.h"
54 #include "vmcore/utf8.h"
57 /* global variables ***********************************************************/
59 /* hashsize must be power of 2 */
61 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
63 hashtable hashtable_string; /* hashtable for javastrings */
65 #if defined(ENABLE_THREADS)
66 static java_object_t *lock_hashtable_string;
70 /* string_init *****************************************************************
72 Initialize the string hashtable lock.
74 *******************************************************************************/
76 bool string_init(void)
78 /* create string (javastring) hashtable */
80 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
82 #if defined(ENABLE_THREADS)
83 /* create string hashtable lock object */
85 lock_hashtable_string = NEW(java_object_t);
87 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
96 /* stringtable_update **********************************************************
98 Traverses the javastring hashtable and sets the vftbl-entries of
99 javastrings which were temporarily set to NULL, because
100 java.lang.Object was not yet loaded.
102 *******************************************************************************/
104 void stringtable_update(void)
106 java_lang_String *js;
108 literalstring *s; /* hashtable entry */
111 for (i = 0; i < hashtable_string.size; i++) {
112 s = hashtable_string.ptr[i];
115 js = (java_lang_String *) s->string;
117 if ((js == NULL) || (js->value == NULL)) {
118 /* error in hashtable found */
120 vm_abort("stringtable_update: invalid literalstring in hashtable");
123 LLNI_field_get_ref(js, value, a);
125 if (!js->header.vftbl)
126 /* vftbl of javastring is NULL */
127 js->header.vftbl = class_java_lang_String->vftbl;
129 if (!a->header.objheader.vftbl)
130 /* vftbl of character-array is NULL */
131 a->header.objheader.vftbl =
132 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
134 /* follow link in external hash chain */
142 /* javastring_new_from_utf_buffer **********************************************
144 Create a new object of type java/lang/String with the text from
145 the specified utf8 buffer.
148 buffer.......points to first char in the buffer
149 blength......number of bytes to read from the buffer
152 the java.lang.String object, or
153 NULL if an exception has been thrown
155 *******************************************************************************/
157 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
160 const char *utf_ptr; /* current utf character in utf string */
161 u4 utflength; /* length of utf-string if uncompressed */
163 java_lang_String *s; /* result-string */
164 java_handle_chararray_t *a;
169 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
171 o = builtin_new(class_java_lang_String);
172 a = builtin_newarray_char(utflength);
174 /* javastring or character-array could not be created */
176 if ((o == NULL) || (a == NULL))
179 /* decompress utf-string */
183 for (i = 0; i < utflength; i++)
184 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
186 /* set fields of the javastring-object */
188 s = (java_lang_String *) o;
190 LLNI_field_set_ref(s, value , a);
191 LLNI_field_set_val(s, offset, 0);
192 LLNI_field_set_val(s, count , utflength);
198 /* javastring_safe_new_from_utf8 ***********************************************
200 Create a new object of type java/lang/String with the text from
201 the specified UTF-8 string. This function is safe for invalid UTF-8.
202 (Invalid characters will be replaced by U+fffd.)
205 text.........the UTF-8 string, zero-terminated.
208 the java.lang.String object, or
209 NULL if an exception has been thrown
211 *******************************************************************************/
213 java_handle_t *javastring_safe_new_from_utf8(const char *text)
216 java_handle_chararray_t *a;
223 /* Get number of bytes. We need this to completely emulate the messy */
224 /* behaviour of the RI. :( */
226 nbytes = strlen(text);
228 /* calculate number of Java characters */
230 len = utf8_safe_number_of_u2s(text, nbytes);
232 /* allocate the String object and the char array */
234 o = builtin_new(class_java_lang_String);
235 a = builtin_newarray_char(len);
237 /* javastring or character-array could not be created? */
239 if ((o == NULL) || (a == NULL))
242 /* decompress UTF-8 string */
244 utf8_safe_convert_to_u2s(text, nbytes, a->data);
246 /* set fields of the String object */
248 s = (java_lang_String *) o;
250 LLNI_field_set_ref(s, value , a);
251 LLNI_field_set_val(s, offset, 0);
252 LLNI_field_set_val(s, count , len);
258 /* javastring_new_from_utf_string **********************************************
260 Create a new object of type java/lang/String with the text from
261 the specified zero-terminated utf8 string.
264 buffer.......points to first char in the buffer
265 blength......number of bytes to read from the buffer
268 the java.lang.String object, or
269 NULL if an exception has been thrown
271 *******************************************************************************/
273 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
277 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
281 /* javastring_new **************************************************************
283 creates a new object of type java/lang/String with the text of
284 the specified utf8-string
286 return: pointer to the string or NULL if memory is exhausted.
288 *******************************************************************************/
290 java_handle_t *javastring_new(utf *u)
292 char *utf_ptr; /* current utf character in utf string */
293 u4 utflength; /* length of utf-string if uncompressed */
295 java_handle_chararray_t *a;
300 exceptions_throw_nullpointerexception();
305 utflength = utf_get_number_of_u2s(u);
307 o = builtin_new(class_java_lang_String);
308 a = builtin_newarray_char(utflength);
310 /* javastring or character-array could not be created */
312 if ((o == NULL) || (a == NULL))
315 /* decompress utf-string */
317 for (i = 0; i < utflength; i++)
318 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
320 /* set fields of the javastring-object */
322 s = (java_lang_String *) o;
324 LLNI_field_set_ref(s, value , a);
325 LLNI_field_set_val(s, offset, 0);
326 LLNI_field_set_val(s, count , utflength);
332 /* javastring_new_slash_to_dot *************************************************
334 creates a new object of type java/lang/String with the text of
335 the specified utf8-string with slashes changed to dots
337 return: pointer to the string or NULL if memory is exhausted.
339 *******************************************************************************/
341 java_handle_t *javastring_new_slash_to_dot(utf *u)
343 char *utf_ptr; /* current utf character in utf string */
344 u4 utflength; /* length of utf-string if uncompressed */
346 java_handle_chararray_t *a;
352 exceptions_throw_nullpointerexception();
357 utflength = utf_get_number_of_u2s(u);
359 o = builtin_new(class_java_lang_String);
360 a = builtin_newarray_char(utflength);
362 /* javastring or character-array could not be created */
363 if ((o == NULL) || (a == NULL))
366 /* decompress utf-string */
368 for (i = 0; i < utflength; i++) {
369 ch = utf_nextu2(&utf_ptr);
372 LLNI_array_direct(a, i) = ch;
375 /* set fields of the javastring-object */
377 s = (java_lang_String *) o;
379 LLNI_field_set_ref(s, value , a);
380 LLNI_field_set_val(s, offset, 0);
381 LLNI_field_set_val(s, count , utflength);
387 /* javastring_new_from_ascii ***************************************************
389 creates a new java/lang/String object which contains the given ASCII
390 C-string converted to UTF-16.
393 text.........string of ASCII characters
396 the java.lang.String object, or
397 NULL if an exception has been thrown.
399 *******************************************************************************/
401 java_handle_t *javastring_new_from_ascii(const char *text)
404 s4 len; /* length of the string */
407 java_handle_chararray_t *a;
410 exceptions_throw_nullpointerexception();
416 o = builtin_new(class_java_lang_String);
417 a = builtin_newarray_char(len);
419 /* javastring or character-array could not be created */
421 if ((o == NULL) || (a == NULL))
426 for (i = 0; i < len; i++)
427 LLNI_array_direct(a, i) = text[i];
429 /* set fields of the javastring-object */
431 s = (java_lang_String *) o;
433 LLNI_field_set_ref(s, value , a);
434 LLNI_field_set_val(s, offset, 0);
435 LLNI_field_set_val(s, count , len);
441 /* javastring_tochar ***********************************************************
443 converts a Java string into a C string.
445 return: pointer to C string
447 Caution: calling method MUST release the allocated memory!
449 *******************************************************************************/
451 char *javastring_tochar(java_handle_t *so)
453 java_lang_String *s = (java_lang_String *) so;
454 java_handle_chararray_t *a;
461 LLNI_field_get_ref(s, value, a);
466 buf = MNEW(char, LLNI_field_direct(s, count) + 1);
468 for (i = 0; i < LLNI_field_direct(s, count); i++)
469 buf[i] = a->data[LLNI_field_direct(s, offset) + i];
477 /* javastring_toutf ************************************************************
479 Make utf symbol from javastring.
481 *******************************************************************************/
483 utf *javastring_toutf(java_handle_t *string, bool isclassname)
487 s = (java_lang_String *) string;
492 return utf_new_u2(LLNI_field_direct(s, value)->data + LLNI_field_direct(s, offset), LLNI_field_direct(s, count), isclassname);
496 /* literalstring_u2 ************************************************************
498 Searches for the javastring with the specified u2-array in the
499 string hashtable, if there is no such string a new one is created.
501 If copymode is true a copy of the u2-array is made.
503 *******************************************************************************/
505 java_object_t *literalstring_u2(java_chararray_t *a, u4 length, u4 offset,
508 literalstring *s; /* hashtable element */
509 java_lang_String *js; /* u2-array wrapped in javastring */
510 java_chararray_t *ca; /* copy of u2-array */
515 LOCK_MONITOR_ENTER(lock_hashtable_string);
517 /* find location in hashtable */
519 key = unicode_hashkey(a->data + offset, length);
520 slot = key & (hashtable_string.size - 1);
521 s = hashtable_string.ptr[slot];
524 js = (java_lang_String *) s->string;
526 if (length == js->count) {
529 for (i = 0; i < length; i++)
530 if (a->data[offset + i] != js->value->data[i])
533 /* string already in hashtable, free memory */
536 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
538 LOCK_MONITOR_EXIT(lock_hashtable_string);
540 return (java_object_t *) js;
544 /* follow link in external hash chain */
549 /* create copy of u2-array for new javastring */
550 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
551 ca = mem_alloc(arraysize);
552 /* memcpy(ca, a, arraysize); */
553 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
554 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
560 /* location in hashtable found, complete arrayheader */
562 ca->header.objheader.vftbl =
563 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
564 ca->header.size = length;
566 assert(class_java_lang_String);
567 assert(class_java_lang_String->state & CLASS_LOADED);
569 /* create new javastring */
571 js = NEW(java_lang_String);
573 #if defined(ENABLE_STATISTICS)
575 size_string += sizeof(java_lang_String);
578 #if defined(ENABLE_THREADS)
579 lock_init_object_lock(&js->header);
582 js->header.vftbl = class_java_lang_String->vftbl;
587 /* create new literalstring */
589 s = NEW(literalstring);
591 #if defined(ENABLE_STATISTICS)
593 size_string += sizeof(literalstring);
596 s->hashlink = hashtable_string.ptr[slot];
597 s->string = (java_object_t *) js;
598 hashtable_string.ptr[slot] = s;
600 /* update number of hashtable entries */
602 hashtable_string.entries++;
604 /* reorganization of hashtable */
606 if (hashtable_string.entries > (hashtable_string.size * 2)) {
607 /* reorganization of hashtable, average length of the external
608 chains is approx. 2 */
612 literalstring *nexts;
613 java_lang_String *tmpjs;
614 hashtable newhash; /* the new hashtable */
616 /* create new hashtable, double the size */
618 hashtable_create(&newhash, hashtable_string.size * 2);
619 newhash.entries = hashtable_string.entries;
621 /* transfer elements to new hashtable */
623 for (i = 0; i < hashtable_string.size; i++) {
624 s = hashtable_string.ptr[i];
628 tmpjs = (java_lang_String *) s->string;
629 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
631 s->hashlink = newhash.ptr[slot];
632 newhash.ptr[slot] = s;
634 /* follow link in external hash chain */
639 /* dispose old table */
641 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
642 hashtable_string = newhash;
645 LOCK_MONITOR_EXIT(lock_hashtable_string);
647 return (java_object_t *) js;
651 /* literalstring_new ***********************************************************
653 Creates a new javastring with the text of the utf-symbol and inserts it into
654 the string hashtable.
656 *******************************************************************************/
658 java_object_t *literalstring_new(utf *u)
660 char *utf_ptr; /* pointer to current unicode character */
662 u4 utflength; /* length of utf-string if uncompressed */
663 java_chararray_t *a; /* u2-array constructed from utf string */
667 utflength = utf_get_number_of_u2s(u);
669 /* allocate memory */
670 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
672 /* convert utf-string to u2-array */
673 for (i = 0; i < utflength; i++)
674 a->data[i] = utf_nextu2(&utf_ptr);
676 return literalstring_u2(a, utflength, 0, false);
680 /* literalstring_free **********************************************************
682 Removes a javastring from memory.
684 *******************************************************************************/
686 void literalstring_free(java_object_t* string)
691 s = (java_lang_String *) string;
694 /* dispose memory of java.lang.String object */
695 FREE(s, java_lang_String);
697 /* dispose memory of java-characterarray */
698 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
702 /* javastring_intern ***********************************************************
704 Intern the given Java string.
706 *******************************************************************************/
708 java_handle_t *javastring_intern(java_handle_t *s)
710 java_lang_String *so;
711 java_chararray_t *value;
714 /* java_lang_String *o; */
717 so = (java_lang_String *) s;
719 value = LLNI_field_direct(so, value);
720 count = LLNI_field_direct(so, count);
721 offset = LLNI_field_direct(so, offset);
723 o = literalstring_u2(value, count, offset, true);
729 /* javastring_print ************************************************************
731 Print the given Java string.
733 *******************************************************************************/
735 void javastring_print(java_handle_t *s)
737 java_lang_String *so;
738 java_chararray_t *value;
744 so = (java_lang_String *) s;
746 value = LLNI_field_direct(so, value);
747 count = LLNI_field_direct(so, count);
748 offset = LLNI_field_direct(so, offset);
750 for (i = offset; i < offset + count; i++) {
751 c = LLNI_array_direct(value, i);
758 * These are local overrides for various environment variables in Emacs.
759 * Please do not remove this and leave it at the end of the file, where
760 * Emacs will automagically detect them.
761 * ---------------------------------------------------------------------
764 * indent-tabs-mode: t
768 * vim:noexpandtab:sw=4:ts=4: