1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: string.c 7967 2007-05-25 15:03:46Z twisti $
36 #include "vm/global.h"
38 #include "mm/memory.h"
40 #include "native/jni.h"
42 #include "native/include/java_lang_String.h"
44 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.h"
48 #include "vm/stringlocal.h"
50 #include "vmcore/options.h"
51 #include "vmcore/statistics.h"
52 #include "vmcore/utf8.h"
55 /* global variables ***********************************************************/
57 /* hashsize must be power of 2 */
59 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
61 hashtable hashtable_string; /* hashtable for javastrings */
63 #if defined(ENABLE_THREADS)
64 static java_objectheader *lock_hashtable_string;
68 /* string_init *****************************************************************
70 Initialize the string hashtable lock.
72 *******************************************************************************/
74 bool string_init(void)
76 /* create string (javastring) hashtable */
78 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
80 #if defined(ENABLE_THREADS)
81 /* create string hashtable lock object */
83 lock_hashtable_string = NEW(java_objectheader);
85 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
94 /* stringtable_update **********************************************************
96 Traverses the javastring hashtable and sets the vftbl-entries of
97 javastrings which were temporarily set to NULL, because
98 java.lang.Object was not yet loaded.
100 *******************************************************************************/
102 void stringtable_update(void)
104 java_lang_String *js;
106 literalstring *s; /* hashtable entry */
109 for (i = 0; i < hashtable_string.size; i++) {
110 s = hashtable_string.ptr[i];
114 js = (java_lang_String *) s->string;
116 if (!js || !js->value) {
117 /* error in hashtable found */
118 log_text("invalid literalstring in hashtable");
124 if (!js->header.vftbl)
125 /* vftbl of javastring is NULL */
126 js->header.vftbl = class_java_lang_String->vftbl;
128 if (!a->header.objheader.vftbl)
129 /* vftbl of character-array is NULL */
130 a->header.objheader.vftbl = primitivetype_table[ARRAYTYPE_CHAR].arrayvftbl;
132 /* follow link in external hash chain */
140 /* javastring_new_from_utf_buffer **********************************************
142 Create a new object of type java/lang/String with the text from
143 the specified utf8 buffer.
146 buffer.......points to first char in the buffer
147 blength......number of bytes to read from the buffer
150 the java.lang.String object, or
151 NULL if an exception has been thrown
153 *******************************************************************************/
155 static java_objectheader *javastring_new_from_utf_buffer(const char *buffer,
158 const char *utf_ptr; /* current utf character in utf string */
159 u4 utflength; /* length of utf-string if uncompressed */
160 java_objectheader *o;
161 java_lang_String *s; /* result-string */
167 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
169 o = builtin_new(class_java_lang_String);
170 a = builtin_newarray_char(utflength);
172 /* javastring or character-array could not be created */
174 if ((o == NULL) || (a == NULL))
177 /* decompress utf-string */
181 for (i = 0; i < utflength; i++)
182 a->data[i] = utf_nextu2((char **) &utf_ptr);
184 /* set fields of the javastring-object */
186 s = (java_lang_String *) o;
190 s->count = utflength;
196 /* javastring_safe_new_from_utf8 ***********************************************
198 Create a new object of type java/lang/String with the text from
199 the specified UTF-8 string. This function is safe for invalid UTF-8.
200 (Invalid characters will be replaced by U+fffd.)
203 text.........the UTF-8 string, zero-terminated.
206 the java.lang.String object, or
207 NULL if an exception has been thrown
209 *******************************************************************************/
211 java_objectheader *javastring_safe_new_from_utf8(const char *text)
213 java_objectheader *o;
221 /* Get number of bytes. We need this to completely emulate the messy */
222 /* behaviour of the RI. :( */
224 nbytes = strlen(text);
226 /* calculate number of Java characters */
228 len = utf8_safe_number_of_u2s(text, nbytes);
230 /* allocate the String object and the char array */
232 o = builtin_new(class_java_lang_String);
233 a = builtin_newarray_char(len);
235 /* javastring or character-array could not be created? */
237 if ((o == NULL) || (a == NULL))
240 /* decompress UTF-8 string */
242 utf8_safe_convert_to_u2s(text, nbytes, a->data);
244 /* set fields of the String object */
246 s = (java_lang_String *) o;
256 /* javastring_new_from_utf_string **********************************************
258 Create a new object of type java/lang/String with the text from
259 the specified zero-terminated utf8 string.
262 buffer.......points to first char in the buffer
263 blength......number of bytes to read from the buffer
266 the java.lang.String object, or
267 NULL if an exception has been thrown
269 *******************************************************************************/
271 java_objectheader *javastring_new_from_utf_string(const char *utfstr)
275 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
279 /* javastring_new **************************************************************
281 creates a new object of type java/lang/String with the text of
282 the specified utf8-string
284 return: pointer to the string or NULL if memory is exhausted.
286 *******************************************************************************/
288 java_objectheader *javastring_new(utf *u)
290 char *utf_ptr; /* current utf character in utf string */
291 u4 utflength; /* length of utf-string if uncompressed */
292 java_objectheader *o;
298 exceptions_throw_nullpointerexception();
303 utflength = utf_get_number_of_u2s(u);
305 o = builtin_new(class_java_lang_String);
306 a = builtin_newarray_char(utflength);
308 /* javastring or character-array could not be created */
310 if ((o == NULL) || (a == NULL))
313 /* decompress utf-string */
315 for (i = 0; i < utflength; i++)
316 a->data[i] = utf_nextu2(&utf_ptr);
318 /* set fields of the javastring-object */
320 s = (java_lang_String *) o;
324 s->count = utflength;
330 /* javastring_new_slash_to_dot *************************************************
332 creates a new object of type java/lang/String with the text of
333 the specified utf8-string with slashes changed to dots
335 return: pointer to the string or NULL if memory is exhausted.
337 *******************************************************************************/
339 java_objectheader *javastring_new_slash_to_dot(utf *u)
341 char *utf_ptr; /* current utf character in utf string */
342 u4 utflength; /* length of utf-string if uncompressed */
343 java_objectheader *o;
350 exceptions_throw_nullpointerexception();
355 utflength = utf_get_number_of_u2s(u);
357 o = builtin_new(class_java_lang_String);
358 a = builtin_newarray_char(utflength);
360 /* javastring or character-array could not be created */
361 if ((o == NULL) || (a == NULL))
364 /* decompress utf-string */
366 for (i = 0; i < utflength; i++) {
367 ch = utf_nextu2(&utf_ptr);
373 /* set fields of the javastring-object */
375 s = (java_lang_String *) o;
379 s->count = utflength;
385 /* javastring_new_from_ascii ***************************************************
387 creates a new java/lang/String object which contains the given ASCII
388 C-string converted to UTF-16.
391 text.........string of ASCII characters
394 the java.lang.String object, or
395 NULL if an exception has been thrown.
397 *******************************************************************************/
399 java_objectheader *javastring_new_from_ascii(const char *text)
402 s4 len; /* length of the string */
403 java_objectheader *o;
408 exceptions_throw_nullpointerexception();
414 o = builtin_new(class_java_lang_String);
415 a = builtin_newarray_char(len);
417 /* javastring or character-array could not be created */
419 if ((o == NULL) || (a == NULL))
424 for (i = 0; i < len; i++)
425 a->data[i] = text[i];
427 /* set fields of the javastring-object */
429 s = (java_lang_String *) o;
439 /* javastring_tochar ***********************************************************
441 converts a Java string into a C string.
443 return: pointer to C string
445 Caution: calling method MUST release the allocated memory!
447 *******************************************************************************/
449 char *javastring_tochar(java_objectheader *so)
451 java_lang_String *s = (java_lang_String *) so;
464 buf = MNEW(char, s->count + 1);
466 for (i = 0; i < s->count; i++)
467 buf[i] = a->data[s->offset + i];
475 /* javastring_toutf ************************************************************
477 Make utf symbol from javastring.
479 *******************************************************************************/
481 utf *javastring_toutf(java_objectheader *string, bool isclassname)
485 s = (java_lang_String *) string;
490 return utf_new_u2(s->value->data + s->offset, s->count, isclassname);
494 /* literalstring_u2 ************************************************************
496 Searches for the javastring with the specified u2-array in the
497 string hashtable, if there is no such string a new one is created.
499 If copymode is true a copy of the u2-array is made.
501 *******************************************************************************/
503 java_objectheader *literalstring_u2(java_chararray *a, u4 length, u4 offset,
506 literalstring *s; /* hashtable element */
507 java_lang_String *js; /* u2-array wrapped in javastring */
508 java_chararray *stringdata; /* copy of u2-array */
513 LOCK_MONITOR_ENTER(lock_hashtable_string);
515 /* find location in hashtable */
517 key = unicode_hashkey(a->data + offset, length);
518 slot = key & (hashtable_string.size - 1);
519 s = hashtable_string.ptr[slot];
522 js = (java_lang_String *) s->string;
524 if (length == js->count) {
527 for (i = 0; i < length; i++)
528 if (a->data[offset + i] != js->value->data[i])
531 /* string already in hashtable, free memory */
534 mem_free(a, sizeof(java_chararray) + sizeof(u2) * (length - 1) + 10);
536 LOCK_MONITOR_EXIT(lock_hashtable_string);
538 return (java_objectheader *) js;
542 /* follow link in external hash chain */
547 /* create copy of u2-array for new javastring */
548 u4 arraysize = sizeof(java_chararray) + sizeof(u2) * (length - 1) + 10;
549 stringdata = mem_alloc(arraysize);
550 /* memcpy(stringdata, a, arraysize); */
551 memcpy(&(stringdata->header), &(a->header), sizeof(java_arrayheader));
552 memcpy(&(stringdata->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
558 /* location in hashtable found, complete arrayheader */
560 stringdata->header.objheader.vftbl =
561 primitivetype_table[ARRAYTYPE_CHAR].arrayvftbl;
562 stringdata->header.size = length;
564 assert(class_java_lang_String);
565 assert(class_java_lang_String->state & CLASS_LOADED);
567 /* if we use eager loading, we have to check loaded String class */
570 list_add_first(&unlinkedclasses, class_java_lang_String);
572 /* create new javastring */
574 js = NEW(java_lang_String);
576 #if defined(ENABLE_STATISTICS)
578 size_string += sizeof(java_lang_String);
581 #if defined(ENABLE_THREADS)
582 lock_init_object_lock(&js->header);
585 js->header.vftbl = class_java_lang_String->vftbl;
586 js->value = stringdata;
590 /* create new literalstring */
592 s = NEW(literalstring);
594 #if defined(ENABLE_STATISTICS)
596 size_string += sizeof(literalstring);
599 s->hashlink = hashtable_string.ptr[slot];
600 s->string = (java_objectheader *) js;
601 hashtable_string.ptr[slot] = s;
603 /* update number of hashtable entries */
605 hashtable_string.entries++;
607 /* reorganization of hashtable */
609 if (hashtable_string.entries > (hashtable_string.size * 2)) {
610 /* reorganization of hashtable, average length of the external
611 chains is approx. 2 */
615 literalstring *nexts;
616 java_lang_String *tmpjs;
617 hashtable newhash; /* the new hashtable */
619 /* create new hashtable, double the size */
621 hashtable_create(&newhash, hashtable_string.size * 2);
622 newhash.entries = hashtable_string.entries;
624 /* transfer elements to new hashtable */
626 for (i = 0; i < hashtable_string.size; i++) {
627 s = hashtable_string.ptr[i];
631 tmpjs = (java_lang_String *) s->string;
632 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
634 s->hashlink = newhash.ptr[slot];
635 newhash.ptr[slot] = s;
637 /* follow link in external hash chain */
642 /* dispose old table */
644 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
645 hashtable_string = newhash;
648 LOCK_MONITOR_EXIT(lock_hashtable_string);
650 return (java_objectheader *) js;
654 /* literalstring_new ***********************************************************
656 Creates a new javastring with the text of the utf-symbol and inserts it into
657 the string hashtable.
659 *******************************************************************************/
661 java_objectheader *literalstring_new(utf *u)
663 char *utf_ptr; /* pointer to current unicode character */
665 u4 utflength; /* length of utf-string if uncompressed */
666 java_chararray *a; /* u2-array constructed from utf string */
670 utflength = utf_get_number_of_u2s(u);
672 /* allocate memory */
673 a = mem_alloc(sizeof(java_chararray) + sizeof(u2) * (utflength - 1) + 10);
675 /* convert utf-string to u2-array */
676 for (i = 0; i < utflength; i++)
677 a->data[i] = utf_nextu2(&utf_ptr);
679 return literalstring_u2(a, utflength, 0, false);
683 /* literalstring_free **********************************************************
685 Removes a javastring from memory.
687 *******************************************************************************/
689 void literalstring_free(java_objectheader* string)
694 s = (java_lang_String *) string;
697 /* dispose memory of java.lang.String object */
698 FREE(s, java_lang_String);
700 /* dispose memory of java-characterarray */
701 FREE(a, sizeof(java_chararray) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
706 * These are local overrides for various environment variables in Emacs.
707 * Please do not remove this and leave it at the end of the file, where
708 * Emacs will automagically detect them.
709 * ---------------------------------------------------------------------
712 * indent-tabs-mode: t
716 * vim:noexpandtab:sw=4:ts=4: