1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: string.c 8357 2007-08-19 22:59:43Z twisti $
36 #include "vm/global.h"
38 #include "mm/memory.h"
40 #include "native/jni.h"
41 #include "native/llni.h"
43 #include "native/include/java_lang_String.h"
45 #include "threads/lock-common.h"
48 #include "vm/builtin.h"
49 #include "vm/exceptions.h"
50 #include "vm/primitive.h"
51 #include "vm/stringlocal.h"
54 #include "vmcore/options.h"
55 #include "vmcore/statistics.h"
56 #include "vmcore/utf8.h"
59 /* global variables ***********************************************************/
61 /* hashsize must be power of 2 */
63 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
65 hashtable hashtable_string; /* hashtable for javastrings */
67 #if defined(ENABLE_THREADS)
68 static java_object_t *lock_hashtable_string;
72 /* string_init *****************************************************************
74 Initialize the string hashtable lock.
76 *******************************************************************************/
78 bool string_init(void)
80 /* create string (javastring) hashtable */
82 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
84 #if defined(ENABLE_THREADS)
85 /* create string hashtable lock object */
87 lock_hashtable_string = NEW(java_object_t);
89 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
98 /* stringtable_update **********************************************************
100 Traverses the javastring hashtable and sets the vftbl-entries of
101 javastrings which were temporarily set to NULL, because
102 java.lang.Object was not yet loaded.
104 *******************************************************************************/
106 void stringtable_update(void)
108 java_lang_String *js;
110 literalstring *s; /* hashtable entry */
113 for (i = 0; i < hashtable_string.size; i++) {
114 s = hashtable_string.ptr[i];
117 js = (java_lang_String *) s->string;
119 if ((js == NULL) || (js->value == NULL)) {
120 /* error in hashtable found */
122 vm_abort("stringtable_update: invalid literalstring in hashtable");
125 LLNI_field_get_ref(js, value, a);
127 if (!js->header.vftbl)
128 /* vftbl of javastring is NULL */
129 js->header.vftbl = class_java_lang_String->vftbl;
131 if (!a->header.objheader.vftbl)
132 /* vftbl of character-array is NULL */
133 a->header.objheader.vftbl =
134 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
136 /* follow link in external hash chain */
144 /* javastring_new_from_utf_buffer **********************************************
146 Create a new object of type java/lang/String with the text from
147 the specified utf8 buffer.
150 buffer.......points to first char in the buffer
151 blength......number of bytes to read from the buffer
154 the java.lang.String object, or
155 NULL if an exception has been thrown
157 *******************************************************************************/
159 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
162 const char *utf_ptr; /* current utf character in utf string */
163 u4 utflength; /* length of utf-string if uncompressed */
165 java_lang_String *s; /* result-string */
166 java_handle_chararray_t *a;
171 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
173 o = builtin_new(class_java_lang_String);
174 a = builtin_newarray_char(utflength);
176 /* javastring or character-array could not be created */
178 if ((o == NULL) || (a == NULL))
181 /* decompress utf-string */
185 for (i = 0; i < utflength; i++)
186 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
188 /* set fields of the javastring-object */
190 s = (java_lang_String *) o;
192 LLNI_field_set_ref(s, value , a);
193 LLNI_field_set_val(s, offset, 0);
194 LLNI_field_set_val(s, count , utflength);
200 /* javastring_safe_new_from_utf8 ***********************************************
202 Create a new object of type java/lang/String with the text from
203 the specified UTF-8 string. This function is safe for invalid UTF-8.
204 (Invalid characters will be replaced by U+fffd.)
207 text.........the UTF-8 string, zero-terminated.
210 the java.lang.String object, or
211 NULL if an exception has been thrown
213 *******************************************************************************/
215 java_handle_t *javastring_safe_new_from_utf8(const char *text)
218 java_handle_chararray_t *a;
225 /* Get number of bytes. We need this to completely emulate the messy */
226 /* behaviour of the RI. :( */
228 nbytes = strlen(text);
230 /* calculate number of Java characters */
232 len = utf8_safe_number_of_u2s(text, nbytes);
234 /* allocate the String object and the char array */
236 o = builtin_new(class_java_lang_String);
237 a = builtin_newarray_char(len);
239 /* javastring or character-array could not be created? */
241 if ((o == NULL) || (a == NULL))
244 /* decompress UTF-8 string */
246 utf8_safe_convert_to_u2s(text, nbytes, a->data);
248 /* set fields of the String object */
250 s = (java_lang_String *) o;
252 LLNI_field_set_ref(s, value , a);
253 LLNI_field_set_val(s, offset, 0);
254 LLNI_field_set_val(s, count , len);
260 /* javastring_new_from_utf_string **********************************************
262 Create a new object of type java/lang/String with the text from
263 the specified zero-terminated utf8 string.
266 buffer.......points to first char in the buffer
267 blength......number of bytes to read from the buffer
270 the java.lang.String object, or
271 NULL if an exception has been thrown
273 *******************************************************************************/
275 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
279 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
283 /* javastring_new **************************************************************
285 creates a new object of type java/lang/String with the text of
286 the specified utf8-string
288 return: pointer to the string or NULL if memory is exhausted.
290 *******************************************************************************/
292 java_handle_t *javastring_new(utf *u)
294 char *utf_ptr; /* current utf character in utf string */
295 u4 utflength; /* length of utf-string if uncompressed */
297 java_handle_chararray_t *a;
302 exceptions_throw_nullpointerexception();
307 utflength = utf_get_number_of_u2s(u);
309 o = builtin_new(class_java_lang_String);
310 a = builtin_newarray_char(utflength);
312 /* javastring or character-array could not be created */
314 if ((o == NULL) || (a == NULL))
317 /* decompress utf-string */
319 for (i = 0; i < utflength; i++)
320 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
322 /* set fields of the javastring-object */
324 s = (java_lang_String *) o;
326 LLNI_field_set_ref(s, value , a);
327 LLNI_field_set_val(s, offset, 0);
328 LLNI_field_set_val(s, count , utflength);
334 /* javastring_new_slash_to_dot *************************************************
336 creates a new object of type java/lang/String with the text of
337 the specified utf8-string with slashes changed to dots
339 return: pointer to the string or NULL if memory is exhausted.
341 *******************************************************************************/
343 java_handle_t *javastring_new_slash_to_dot(utf *u)
345 char *utf_ptr; /* current utf character in utf string */
346 u4 utflength; /* length of utf-string if uncompressed */
348 java_handle_chararray_t *a;
354 exceptions_throw_nullpointerexception();
359 utflength = utf_get_number_of_u2s(u);
361 o = builtin_new(class_java_lang_String);
362 a = builtin_newarray_char(utflength);
364 /* javastring or character-array could not be created */
365 if ((o == NULL) || (a == NULL))
368 /* decompress utf-string */
370 for (i = 0; i < utflength; i++) {
371 ch = utf_nextu2(&utf_ptr);
374 LLNI_array_direct(a, i) = ch;
377 /* set fields of the javastring-object */
379 s = (java_lang_String *) o;
381 LLNI_field_set_ref(s, value , a);
382 LLNI_field_set_val(s, offset, 0);
383 LLNI_field_set_val(s, count , utflength);
389 /* javastring_new_from_ascii ***************************************************
391 creates a new java/lang/String object which contains the given ASCII
392 C-string converted to UTF-16.
395 text.........string of ASCII characters
398 the java.lang.String object, or
399 NULL if an exception has been thrown.
401 *******************************************************************************/
403 java_handle_t *javastring_new_from_ascii(const char *text)
406 s4 len; /* length of the string */
409 java_handle_chararray_t *a;
412 exceptions_throw_nullpointerexception();
418 o = builtin_new(class_java_lang_String);
419 a = builtin_newarray_char(len);
421 /* javastring or character-array could not be created */
423 if ((o == NULL) || (a == NULL))
428 for (i = 0; i < len; i++)
429 LLNI_array_direct(a, i) = text[i];
431 /* set fields of the javastring-object */
433 s = (java_lang_String *) o;
435 LLNI_field_set_ref(s, value , a);
436 LLNI_field_set_val(s, offset, 0);
437 LLNI_field_set_val(s, count , len);
443 /* javastring_tochar ***********************************************************
445 converts a Java string into a C string.
447 return: pointer to C string
449 Caution: calling method MUST release the allocated memory!
451 *******************************************************************************/
453 char *javastring_tochar(java_handle_t *so)
455 java_lang_String *s = (java_lang_String *) so;
456 java_handle_chararray_t *a;
463 LLNI_field_get_ref(s, value, a);
468 buf = MNEW(char, LLNI_field_direct(s, count) + 1);
470 for (i = 0; i < LLNI_field_direct(s, count); i++)
471 buf[i] = a->data[LLNI_field_direct(s, offset) + i];
479 /* javastring_toutf ************************************************************
481 Make utf symbol from javastring.
483 *******************************************************************************/
485 utf *javastring_toutf(java_handle_t *string, bool isclassname)
489 s = (java_lang_String *) string;
494 return utf_new_u2(LLNI_field_direct(s, value)->data + LLNI_field_direct(s, offset), LLNI_field_direct(s, count), isclassname);
498 /* literalstring_u2 ************************************************************
500 Searches for the javastring with the specified u2-array in the
501 string hashtable, if there is no such string a new one is created.
503 If copymode is true a copy of the u2-array is made.
505 *******************************************************************************/
507 java_object_t *literalstring_u2(java_chararray_t *a, u4 length, u4 offset,
510 literalstring *s; /* hashtable element */
511 java_lang_String *js; /* u2-array wrapped in javastring */
512 java_chararray_t *ca; /* copy of u2-array */
517 LOCK_MONITOR_ENTER(lock_hashtable_string);
519 /* find location in hashtable */
521 key = unicode_hashkey(a->data + offset, length);
522 slot = key & (hashtable_string.size - 1);
523 s = hashtable_string.ptr[slot];
526 js = (java_lang_String *) s->string;
528 if (length == js->count) {
531 for (i = 0; i < length; i++)
532 if (a->data[offset + i] != js->value->data[i])
535 /* string already in hashtable, free memory */
538 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
540 LOCK_MONITOR_EXIT(lock_hashtable_string);
542 return (java_object_t *) js;
546 /* follow link in external hash chain */
551 /* create copy of u2-array for new javastring */
552 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
553 ca = mem_alloc(arraysize);
554 /* memcpy(ca, a, arraysize); */
555 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
556 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
562 /* location in hashtable found, complete arrayheader */
564 ca->header.objheader.vftbl =
565 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
566 ca->header.size = length;
568 assert(class_java_lang_String);
569 assert(class_java_lang_String->state & CLASS_LOADED);
571 /* create new javastring */
573 js = NEW(java_lang_String);
575 #if defined(ENABLE_STATISTICS)
577 size_string += sizeof(java_lang_String);
580 #if defined(ENABLE_THREADS)
581 lock_init_object_lock(&js->header);
584 js->header.vftbl = class_java_lang_String->vftbl;
589 /* create new literalstring */
591 s = NEW(literalstring);
593 #if defined(ENABLE_STATISTICS)
595 size_string += sizeof(literalstring);
598 s->hashlink = hashtable_string.ptr[slot];
599 s->string = (java_object_t *) js;
600 hashtable_string.ptr[slot] = s;
602 /* update number of hashtable entries */
604 hashtable_string.entries++;
606 /* reorganization of hashtable */
608 if (hashtable_string.entries > (hashtable_string.size * 2)) {
609 /* reorganization of hashtable, average length of the external
610 chains is approx. 2 */
614 literalstring *nexts;
615 java_lang_String *tmpjs;
616 hashtable newhash; /* the new hashtable */
618 /* create new hashtable, double the size */
620 hashtable_create(&newhash, hashtable_string.size * 2);
621 newhash.entries = hashtable_string.entries;
623 /* transfer elements to new hashtable */
625 for (i = 0; i < hashtable_string.size; i++) {
626 s = hashtable_string.ptr[i];
630 tmpjs = (java_lang_String *) s->string;
631 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
633 s->hashlink = newhash.ptr[slot];
634 newhash.ptr[slot] = s;
636 /* follow link in external hash chain */
641 /* dispose old table */
643 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
644 hashtable_string = newhash;
647 LOCK_MONITOR_EXIT(lock_hashtable_string);
649 return (java_object_t *) js;
653 /* literalstring_new ***********************************************************
655 Creates a new javastring with the text of the utf-symbol and inserts it into
656 the string hashtable.
658 *******************************************************************************/
660 java_object_t *literalstring_new(utf *u)
662 char *utf_ptr; /* pointer to current unicode character */
664 u4 utflength; /* length of utf-string if uncompressed */
665 java_chararray_t *a; /* u2-array constructed from utf string */
669 utflength = utf_get_number_of_u2s(u);
671 /* allocate memory */
672 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
674 /* convert utf-string to u2-array */
675 for (i = 0; i < utflength; i++)
676 a->data[i] = utf_nextu2(&utf_ptr);
678 return literalstring_u2(a, utflength, 0, false);
682 /* literalstring_free **********************************************************
684 Removes a javastring from memory.
686 *******************************************************************************/
688 void literalstring_free(java_object_t* string)
693 s = (java_lang_String *) string;
696 /* dispose memory of java.lang.String object */
697 FREE(s, java_lang_String);
699 /* dispose memory of java-characterarray */
700 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
704 /* javastring_intern ***********************************************************
706 Intern the given Java string.
708 *******************************************************************************/
710 java_handle_t *javastring_intern(java_handle_t *s)
712 java_lang_String *so;
713 java_chararray_t *value;
716 /* java_lang_String *o; */
719 so = (java_lang_String *) s;
721 value = LLNI_field_direct(so, value);
722 count = LLNI_field_direct(so, count);
723 offset = LLNI_field_direct(so, offset);
725 o = literalstring_u2(value, count, offset, true);
731 /* javastring_print ************************************************************
733 Print the given Java string.
735 *******************************************************************************/
737 void javastring_print(java_handle_t *s)
739 java_lang_String *so;
740 java_chararray_t *value;
746 so = (java_lang_String *) s;
748 value = LLNI_field_direct(so, value);
749 count = LLNI_field_direct(so, count);
750 offset = LLNI_field_direct(so, offset);
752 for (i = offset; i < offset + count; i++) {
753 c = LLNI_array_direct(value, i);
760 * These are local overrides for various environment variables in Emacs.
761 * Please do not remove this and leave it at the end of the file, where
762 * Emacs will automagically detect them.
763 * ---------------------------------------------------------------------
766 * indent-tabs-mode: t
770 * vim:noexpandtab:sw=4:ts=4: