1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: string.c 7813 2007-04-25 19:20:13Z twisti $
36 #include "vm/global.h"
38 #include "mm/memory.h"
40 #include "native/jni.h"
42 #include "native/include/java_lang_String.h"
44 #include "threads/lock-common.h"
46 #include "vm/builtin.h"
47 #include "vm/exceptions.h"
48 #include "vm/stringlocal.h"
50 #include "vmcore/options.h"
51 #include "vmcore/statistics.h"
52 #include "vmcore/utf8.h"
55 /* global variables ***********************************************************/
57 /* hashsize must be power of 2 */
59 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
61 hashtable hashtable_string; /* hashtable for javastrings */
63 #if defined(ENABLE_THREADS)
64 static java_objectheader *lock_hashtable_string;
68 /* string_init *****************************************************************
70 Initialize the string hashtable lock.
72 *******************************************************************************/
74 bool string_init(void)
76 /* create string (javastring) hashtable */
78 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
80 #if defined(ENABLE_THREADS)
81 /* create string hashtable lock object */
83 lock_hashtable_string = NEW(java_objectheader);
85 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
94 /* stringtable_update **********************************************************
96 Traverses the javastring hashtable and sets the vftbl-entries of
97 javastrings which were temporarily set to NULL, because
98 java.lang.Object was not yet loaded.
100 *******************************************************************************/
102 void stringtable_update(void)
104 java_lang_String *js;
106 literalstring *s; /* hashtable entry */
109 for (i = 0; i < hashtable_string.size; i++) {
110 s = hashtable_string.ptr[i];
114 js = (java_lang_String *) s->string;
116 if (!js || !js->value) {
117 /* error in hashtable found */
118 log_text("invalid literalstring in hashtable");
124 if (!js->header.vftbl)
125 /* vftbl of javastring is NULL */
126 js->header.vftbl = class_java_lang_String->vftbl;
128 if (!a->header.objheader.vftbl)
129 /* vftbl of character-array is NULL */
130 a->header.objheader.vftbl = primitivetype_table[ARRAYTYPE_CHAR].arrayvftbl;
132 /* follow link in external hash chain */
140 /* javastring_new_from_utf_buffer **********************************************
142 Create a new object of type java/lang/String with the text from
143 the specified utf8 buffer.
146 buffer.......points to first char in the buffer
147 blength......number of bytes to read from the buffer
150 the java.lang.String object, or
151 NULL if an exception has been thrown
153 *******************************************************************************/
155 java_objectheader *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
157 const char *utf_ptr; /* current utf character in utf string */
158 u4 utflength; /* length of utf-string if uncompressed */
159 java_objectheader *o;
160 java_lang_String *s; /* result-string */
166 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
168 o = builtin_new(class_java_lang_String);
169 a = builtin_newarray_char(utflength);
171 /* javastring or character-array could not be created */
173 if ((o == NULL) || (a == NULL))
176 /* decompress utf-string */
180 for (i = 0; i < utflength; i++)
181 a->data[i] = utf_nextu2((char **) &utf_ptr);
183 /* set fields of the javastring-object */
185 s = (java_lang_String *) o;
189 s->count = utflength;
195 /* javastring_safe_new_from_utf8 ***********************************************
197 Create a new object of type java/lang/String with the text from
198 the specified UTF-8 string. This function is safe for invalid UTF-8.
199 (Invalid characters will be replaced by U+fffd.)
202 text.........the UTF-8 string, zero-terminated.
205 the java.lang.String object, or
206 NULL if an exception has been thrown
208 *******************************************************************************/
210 java_objectheader *javastring_safe_new_from_utf8(const char *text)
212 java_objectheader *o;
220 /* Get number of bytes. We need this to completely emulate the messy */
221 /* behaviour of the RI. :( */
223 nbytes = strlen(text);
225 /* calculate number of Java characters */
227 len = utf8_safe_number_of_u2s(text, nbytes);
229 /* allocate the String object and the char array */
231 o = builtin_new(class_java_lang_String);
232 a = builtin_newarray_char(len);
234 /* javastring or character-array could not be created? */
236 if ((o == NULL) || (a == NULL))
239 /* decompress UTF-8 string */
241 utf8_safe_convert_to_u2s(text, nbytes, a->data);
243 /* set fields of the String object */
245 s = (java_lang_String *) o;
255 /* javastring_new_from_utf_string **********************************************
257 Create a new object of type java/lang/String with the text from
258 the specified zero-terminated utf8 string.
261 buffer.......points to first char in the buffer
262 blength......number of bytes to read from the buffer
265 the java.lang.String object, or
266 NULL if an exception has been thrown
268 *******************************************************************************/
270 java_objectheader *javastring_new_from_utf_string(const char *utfstr)
274 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
278 /* javastring_new **************************************************************
280 creates a new object of type java/lang/String with the text of
281 the specified utf8-string
283 return: pointer to the string or NULL if memory is exhausted.
285 *******************************************************************************/
287 java_objectheader *javastring_new(utf *u)
289 char *utf_ptr; /* current utf character in utf string */
290 u4 utflength; /* length of utf-string if uncompressed */
291 java_objectheader *o;
297 exceptions_throw_nullpointerexception();
302 utflength = utf_get_number_of_u2s(u);
304 o = builtin_new(class_java_lang_String);
305 a = builtin_newarray_char(utflength);
307 /* javastring or character-array could not be created */
309 if ((o == NULL) || (a == NULL))
312 /* decompress utf-string */
314 for (i = 0; i < utflength; i++)
315 a->data[i] = utf_nextu2(&utf_ptr);
317 /* set fields of the javastring-object */
319 s = (java_lang_String *) o;
323 s->count = utflength;
329 /* javastring_new_slash_to_dot *************************************************
331 creates a new object of type java/lang/String with the text of
332 the specified utf8-string with slashes changed to dots
334 return: pointer to the string or NULL if memory is exhausted.
336 *******************************************************************************/
338 java_objectheader *javastring_new_slash_to_dot(utf *u)
340 char *utf_ptr; /* current utf character in utf string */
341 u4 utflength; /* length of utf-string if uncompressed */
342 java_objectheader *o;
349 exceptions_throw_nullpointerexception();
354 utflength = utf_get_number_of_u2s(u);
356 o = builtin_new(class_java_lang_String);
357 a = builtin_newarray_char(utflength);
359 /* javastring or character-array could not be created */
360 if ((o == NULL) || (a == NULL))
363 /* decompress utf-string */
365 for (i = 0; i < utflength; i++) {
366 ch = utf_nextu2(&utf_ptr);
372 /* set fields of the javastring-object */
374 s = (java_lang_String *) o;
378 s->count = utflength;
384 /* javastring_new_from_ascii ***************************************************
386 creates a new java/lang/String object which contains the given ASCII
387 C-string converted to UTF-16.
390 text.........string of ASCII characters
393 the java.lang.String object, or
394 NULL if an exception has been thrown.
396 *******************************************************************************/
398 java_objectheader *javastring_new_from_ascii(const char *text)
401 s4 len; /* length of the string */
402 java_objectheader *o;
407 exceptions_throw_nullpointerexception();
413 o = builtin_new(class_java_lang_String);
414 a = builtin_newarray_char(len);
416 /* javastring or character-array could not be created */
418 if ((o == NULL) || (a == NULL))
423 for (i = 0; i < len; i++)
424 a->data[i] = text[i];
426 /* set fields of the javastring-object */
428 s = (java_lang_String *) o;
438 /* javastring_tochar ***********************************************************
440 converts a Java string into a C string.
442 return: pointer to C string
444 Caution: calling method MUST release the allocated memory!
446 *******************************************************************************/
448 char *javastring_tochar(java_objectheader *so)
450 java_lang_String *s = (java_lang_String *) so;
463 buf = MNEW(char, s->count + 1);
465 for (i = 0; i < s->count; i++)
466 buf[i] = a->data[s->offset + i];
474 /* javastring_toutf ************************************************************
476 Make utf symbol from javastring.
478 *******************************************************************************/
480 utf *javastring_toutf(java_objectheader *string, bool isclassname)
484 s = (java_lang_String *) string;
489 return utf_new_u2(s->value->data + s->offset, s->count, isclassname);
493 /* literalstring_u2 ************************************************************
495 Searches for the javastring with the specified u2-array in the
496 string hashtable, if there is no such string a new one is created.
498 If copymode is true a copy of the u2-array is made.
500 *******************************************************************************/
502 java_objectheader *literalstring_u2(java_chararray *a, u4 length, u4 offset,
505 literalstring *s; /* hashtable element */
506 java_lang_String *js; /* u2-array wrapped in javastring */
507 java_chararray *stringdata; /* copy of u2-array */
512 LOCK_MONITOR_ENTER(lock_hashtable_string);
514 /* find location in hashtable */
516 key = unicode_hashkey(a->data + offset, length);
517 slot = key & (hashtable_string.size - 1);
518 s = hashtable_string.ptr[slot];
521 js = (java_lang_String *) s->string;
523 if (length == js->count) {
526 for (i = 0; i < length; i++)
527 if (a->data[offset + i] != js->value->data[i])
530 /* string already in hashtable, free memory */
533 mem_free(a, sizeof(java_chararray) + sizeof(u2) * (length - 1) + 10);
535 LOCK_MONITOR_EXIT(lock_hashtable_string);
537 return (java_objectheader *) js;
541 /* follow link in external hash chain */
546 /* create copy of u2-array for new javastring */
547 u4 arraysize = sizeof(java_chararray) + sizeof(u2) * (length - 1) + 10;
548 stringdata = mem_alloc(arraysize);
549 /* memcpy(stringdata, a, arraysize); */
550 memcpy(&(stringdata->header), &(a->header), sizeof(java_arrayheader));
551 memcpy(&(stringdata->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
557 /* location in hashtable found, complete arrayheader */
559 stringdata->header.objheader.vftbl =
560 primitivetype_table[ARRAYTYPE_CHAR].arrayvftbl;
561 stringdata->header.size = length;
563 assert(class_java_lang_String);
564 assert(class_java_lang_String->state & CLASS_LOADED);
566 /* if we use eager loading, we have to check loaded String class */
569 list_add_first(&unlinkedclasses, class_java_lang_String);
571 /* create new javastring */
573 js = NEW(java_lang_String);
575 #if defined(ENABLE_STATISTICS)
577 size_string += sizeof(java_lang_String);
580 #if defined(ENABLE_THREADS)
581 lock_init_object_lock(&js->header);
584 js->header.vftbl = class_java_lang_String->vftbl;
585 js->value = stringdata;
589 /* create new literalstring */
591 s = NEW(literalstring);
593 #if defined(ENABLE_STATISTICS)
595 size_string += sizeof(literalstring);
598 s->hashlink = hashtable_string.ptr[slot];
599 s->string = (java_objectheader *) js;
600 hashtable_string.ptr[slot] = s;
602 /* update number of hashtable entries */
604 hashtable_string.entries++;
606 /* reorganization of hashtable */
608 if (hashtable_string.entries > (hashtable_string.size * 2)) {
609 /* reorganization of hashtable, average length of the external
610 chains is approx. 2 */
614 literalstring *nexts;
615 java_lang_String *tmpjs;
616 hashtable newhash; /* the new hashtable */
618 /* create new hashtable, double the size */
620 hashtable_create(&newhash, hashtable_string.size * 2);
621 newhash.entries = hashtable_string.entries;
623 /* transfer elements to new hashtable */
625 for (i = 0; i < hashtable_string.size; i++) {
626 s = hashtable_string.ptr[i];
630 tmpjs = (java_lang_String *) s->string;
631 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
633 s->hashlink = newhash.ptr[slot];
634 newhash.ptr[slot] = s;
636 /* follow link in external hash chain */
641 /* dispose old table */
643 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
644 hashtable_string = newhash;
647 LOCK_MONITOR_EXIT(lock_hashtable_string);
649 return (java_objectheader *) js;
653 /* literalstring_new ***********************************************************
655 Creates a new javastring with the text of the utf-symbol and inserts it into
656 the string hashtable.
658 *******************************************************************************/
660 java_objectheader *literalstring_new(utf *u)
662 char *utf_ptr; /* pointer to current unicode character */
664 u4 utflength; /* length of utf-string if uncompressed */
665 java_chararray *a; /* u2-array constructed from utf string */
669 utflength = utf_get_number_of_u2s(u);
671 /* allocate memory */
672 a = mem_alloc(sizeof(java_chararray) + sizeof(u2) * (utflength - 1) + 10);
674 /* convert utf-string to u2-array */
675 for (i = 0; i < utflength; i++)
676 a->data[i] = utf_nextu2(&utf_ptr);
678 return literalstring_u2(a, utflength, 0, false);
682 /* literalstring_free **********************************************************
684 Removes a javastring from memory.
686 *******************************************************************************/
688 void literalstring_free(java_objectheader* string)
693 s = (java_lang_String *) string;
696 /* dispose memory of java.lang.String object */
697 FREE(s, java_lang_String);
699 /* dispose memory of java-characterarray */
700 FREE(a, sizeof(java_chararray) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
705 * These are local overrides for various environment variables in Emacs.
706 * Please do not remove this and leave it at the end of the file, where
707 * Emacs will automagically detect them.
708 * ---------------------------------------------------------------------
711 * indent-tabs-mode: t
715 * vim:noexpandtab:sw=4:ts=4: