1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: string.c 7522 2007-03-14 21:54:49Z twisti $
36 #include "vm/global.h"
38 #include "mm/memory.h"
40 #include "native/jni.h"
42 #include "native/include/java_lang_String.h"
44 #if defined(ENABLE_THREADS)
45 # include "threads/native/lock.h"
47 # include "threads/none/lock.h"
50 #include "vm/builtin.h"
51 #include "vm/exceptions.h"
52 #include "vm/stringlocal.h"
54 #include "vmcore/options.h"
55 #include "vmcore/statistics.h"
56 #include "vmcore/utf8.h"
59 /* global variables ***********************************************************/
61 /* hashsize must be power of 2 */
63 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
65 hashtable hashtable_string; /* hashtable for javastrings */
67 #if defined(ENABLE_THREADS)
68 static java_objectheader *lock_hashtable_string;
72 /* string_init *****************************************************************
74 Initialize the string hashtable lock.
76 *******************************************************************************/
78 bool string_init(void)
80 /* create string (javastring) hashtable */
82 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
84 #if defined(ENABLE_THREADS)
85 /* create string hashtable lock object */
87 lock_hashtable_string = NEW(java_objectheader);
89 lock_init_object_lock(lock_hashtable_string);
98 /* stringtable_update **********************************************************
100 Traverses the javastring hashtable and sets the vftbl-entries of
101 javastrings which were temporarily set to NULL, because
102 java.lang.Object was not yet loaded.
104 *******************************************************************************/
106 void stringtable_update(void)
108 java_lang_String *js;
110 literalstring *s; /* hashtable entry */
113 for (i = 0; i < hashtable_string.size; i++) {
114 s = hashtable_string.ptr[i];
118 js = (java_lang_String *) s->string;
120 if (!js || !js->value) {
121 /* error in hashtable found */
122 log_text("invalid literalstring in hashtable");
128 if (!js->header.vftbl)
129 /* vftbl of javastring is NULL */
130 js->header.vftbl = class_java_lang_String->vftbl;
132 if (!a->header.objheader.vftbl)
133 /* vftbl of character-array is NULL */
134 a->header.objheader.vftbl = primitivetype_table[ARRAYTYPE_CHAR].arrayvftbl;
136 /* follow link in external hash chain */
144 /* javastring_new_from_utf_buffer **********************************************
146 Create a new object of type java/lang/String with the text from
147 the specified utf8 buffer.
150 buffer.......points to first char in the buffer
151 blength......number of bytes to read from the buffer
154 the java.lang.String object, or
155 NULL if an exception has been thrown
157 *******************************************************************************/
159 java_objectheader *javastring_new_from_utf_buffer(const char *buffer, u4 blength)
161 const char *utf_ptr; /* current utf character in utf string */
162 u4 utflength; /* length of utf-string if uncompressed */
163 java_objectheader *o;
164 java_lang_String *s; /* result-string */
170 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
172 o = builtin_new(class_java_lang_String);
173 a = builtin_newarray_char(utflength);
175 /* javastring or character-array could not be created */
177 if ((o == NULL) || (a == NULL))
180 /* decompress utf-string */
184 for (i = 0; i < utflength; i++)
185 a->data[i] = utf_nextu2((char **) &utf_ptr);
187 /* set fields of the javastring-object */
189 s = (java_lang_String *) o;
193 s->count = utflength;
199 /* javastring_safe_new_from_utf8 ***********************************************
201 Create a new object of type java/lang/String with the text from
202 the specified UTF-8 string. This function is safe for invalid UTF-8.
203 (Invalid characters will be replaced by U+fffd.)
206 text.........the UTF-8 string, zero-terminated.
209 the java.lang.String object, or
210 NULL if an exception has been thrown
212 *******************************************************************************/
214 java_objectheader *javastring_safe_new_from_utf8(const char *text)
216 java_objectheader *o;
224 /* Get number of bytes. We need this to completely emulate the messy */
225 /* behaviour of the RI. :( */
227 nbytes = strlen(text);
229 /* calculate number of Java characters */
231 len = utf8_safe_number_of_u2s(text, nbytes);
233 /* allocate the String object and the char array */
235 o = builtin_new(class_java_lang_String);
236 a = builtin_newarray_char(len);
238 /* javastring or character-array could not be created? */
240 if ((o == NULL) || (a == NULL))
243 /* decompress UTF-8 string */
245 utf8_safe_convert_to_u2s(text, nbytes, a->data);
247 /* set fields of the String object */
249 s = (java_lang_String *) o;
259 /* javastring_new_from_utf_string **********************************************
261 Create a new object of type java/lang/String with the text from
262 the specified zero-terminated utf8 string.
265 buffer.......points to first char in the buffer
266 blength......number of bytes to read from the buffer
269 the java.lang.String object, or
270 NULL if an exception has been thrown
272 *******************************************************************************/
274 java_objectheader *javastring_new_from_utf_string(const char *utfstr)
278 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
282 /* javastring_new **************************************************************
284 creates a new object of type java/lang/String with the text of
285 the specified utf8-string
287 return: pointer to the string or NULL if memory is exhausted.
289 *******************************************************************************/
291 java_objectheader *javastring_new(utf *u)
293 char *utf_ptr; /* current utf character in utf string */
294 u4 utflength; /* length of utf-string if uncompressed */
295 java_objectheader *o;
301 exceptions_throw_nullpointerexception();
306 utflength = utf_get_number_of_u2s(u);
308 o = builtin_new(class_java_lang_String);
309 a = builtin_newarray_char(utflength);
311 /* javastring or character-array could not be created */
313 if ((o == NULL) || (a == NULL))
316 /* decompress utf-string */
318 for (i = 0; i < utflength; i++)
319 a->data[i] = utf_nextu2(&utf_ptr);
321 /* set fields of the javastring-object */
323 s = (java_lang_String *) o;
327 s->count = utflength;
333 /* javastring_new_slash_to_dot *************************************************
335 creates a new object of type java/lang/String with the text of
336 the specified utf8-string with slashes changed to dots
338 return: pointer to the string or NULL if memory is exhausted.
340 *******************************************************************************/
342 java_objectheader *javastring_new_slash_to_dot(utf *u)
344 char *utf_ptr; /* current utf character in utf string */
345 u4 utflength; /* length of utf-string if uncompressed */
346 java_objectheader *o;
353 exceptions_throw_nullpointerexception();
358 utflength = utf_get_number_of_u2s(u);
360 o = builtin_new(class_java_lang_String);
361 a = builtin_newarray_char(utflength);
363 /* javastring or character-array could not be created */
364 if ((o == NULL) || (a == NULL))
367 /* decompress utf-string */
369 for (i = 0; i < utflength; i++) {
370 ch = utf_nextu2(&utf_ptr);
376 /* set fields of the javastring-object */
378 s = (java_lang_String *) o;
382 s->count = utflength;
388 /* javastring_new_from_ascii ***************************************************
390 creates a new java/lang/String object which contains the given ASCII
391 C-string converted to UTF-16.
394 text.........string of ASCII characters
397 the java.lang.String object, or
398 NULL if an exception has been thrown.
400 *******************************************************************************/
402 java_objectheader *javastring_new_from_ascii(const char *text)
405 s4 len; /* length of the string */
406 java_objectheader *o;
411 exceptions_throw_nullpointerexception();
417 o = builtin_new(class_java_lang_String);
418 a = builtin_newarray_char(len);
420 /* javastring or character-array could not be created */
422 if ((o == NULL) || (a == NULL))
427 for (i = 0; i < len; i++)
428 a->data[i] = text[i];
430 /* set fields of the javastring-object */
432 s = (java_lang_String *) o;
442 /* javastring_tochar ***********************************************************
444 converts a Java string into a C string.
446 return: pointer to C string
448 Caution: calling method MUST release the allocated memory!
450 *******************************************************************************/
452 char *javastring_tochar(java_objectheader *so)
454 java_lang_String *s = (java_lang_String *) so;
467 buf = MNEW(char, s->count + 1);
469 for (i = 0; i < s->count; i++)
470 buf[i] = a->data[s->offset + i];
478 /* javastring_toutf ************************************************************
480 Make utf symbol from javastring.
482 *******************************************************************************/
484 utf *javastring_toutf(java_objectheader *string, bool isclassname)
488 s = (java_lang_String *) string;
493 return utf_new_u2(s->value->data + s->offset, s->count, isclassname);
497 /* literalstring_u2 ************************************************************
499 Searches for the javastring with the specified u2-array in the
500 string hashtable, if there is no such string a new one is created.
502 If copymode is true a copy of the u2-array is made.
504 *******************************************************************************/
506 java_objectheader *literalstring_u2(java_chararray *a, u4 length, u4 offset,
509 literalstring *s; /* hashtable element */
510 java_lang_String *js; /* u2-array wrapped in javastring */
511 java_chararray *stringdata; /* copy of u2-array */
516 LOCK_MONITOR_ENTER(lock_hashtable_string);
518 /* find location in hashtable */
520 key = unicode_hashkey(a->data + offset, length);
521 slot = key & (hashtable_string.size - 1);
522 s = hashtable_string.ptr[slot];
525 js = (java_lang_String *) s->string;
527 if (length == js->count) {
530 for (i = 0; i < length; i++)
531 if (a->data[offset + i] != js->value->data[i])
534 /* string already in hashtable, free memory */
537 mem_free(a, sizeof(java_chararray) + sizeof(u2) * (length - 1) + 10);
539 LOCK_MONITOR_EXIT(lock_hashtable_string);
541 return (java_objectheader *) js;
545 /* follow link in external hash chain */
550 /* create copy of u2-array for new javastring */
551 u4 arraysize = sizeof(java_chararray) + sizeof(u2) * (length - 1) + 10;
552 stringdata = mem_alloc(arraysize);
553 /* memcpy(stringdata, a, arraysize); */
554 memcpy(&(stringdata->header), &(a->header), sizeof(java_arrayheader));
555 memcpy(&(stringdata->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
561 /* location in hashtable found, complete arrayheader */
563 stringdata->header.objheader.vftbl =
564 primitivetype_table[ARRAYTYPE_CHAR].arrayvftbl;
565 stringdata->header.size = length;
567 assert(class_java_lang_String);
568 assert(class_java_lang_String->state & CLASS_LOADED);
570 /* if we use eager loading, we have to check loaded String class */
573 list_add_first(&unlinkedclasses, class_java_lang_String);
575 /* create new javastring */
577 js = NEW(java_lang_String);
579 #if defined(ENABLE_STATISTICS)
581 size_string += sizeof(java_lang_String);
584 #if defined(ENABLE_THREADS)
585 lock_init_object_lock(&js->header);
588 js->header.vftbl = class_java_lang_String->vftbl;
589 js->value = stringdata;
593 /* create new literalstring */
595 s = NEW(literalstring);
597 #if defined(ENABLE_STATISTICS)
599 size_string += sizeof(literalstring);
602 s->hashlink = hashtable_string.ptr[slot];
603 s->string = (java_objectheader *) js;
604 hashtable_string.ptr[slot] = s;
606 /* update number of hashtable entries */
608 hashtable_string.entries++;
610 /* reorganization of hashtable */
612 if (hashtable_string.entries > (hashtable_string.size * 2)) {
613 /* reorganization of hashtable, average length of the external
614 chains is approx. 2 */
618 literalstring *nexts;
619 java_lang_String *tmpjs;
620 hashtable newhash; /* the new hashtable */
622 /* create new hashtable, double the size */
624 hashtable_create(&newhash, hashtable_string.size * 2);
625 newhash.entries = hashtable_string.entries;
627 /* transfer elements to new hashtable */
629 for (i = 0; i < hashtable_string.size; i++) {
630 s = hashtable_string.ptr[i];
634 tmpjs = (java_lang_String *) s->string;
635 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
637 s->hashlink = newhash.ptr[slot];
638 newhash.ptr[slot] = s;
640 /* follow link in external hash chain */
645 /* dispose old table */
647 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
648 hashtable_string = newhash;
651 LOCK_MONITOR_EXIT(lock_hashtable_string);
653 return (java_objectheader *) js;
657 /* literalstring_new ***********************************************************
659 Creates a new javastring with the text of the utf-symbol and inserts it into
660 the string hashtable.
662 *******************************************************************************/
664 java_objectheader *literalstring_new(utf *u)
666 char *utf_ptr; /* pointer to current unicode character */
668 u4 utflength; /* length of utf-string if uncompressed */
669 java_chararray *a; /* u2-array constructed from utf string */
673 utflength = utf_get_number_of_u2s(u);
675 /* allocate memory */
676 a = mem_alloc(sizeof(java_chararray) + sizeof(u2) * (utflength - 1) + 10);
678 /* convert utf-string to u2-array */
679 for (i = 0; i < utflength; i++)
680 a->data[i] = utf_nextu2(&utf_ptr);
682 return literalstring_u2(a, utflength, 0, false);
686 /* literalstring_free **********************************************************
688 Removes a javastring from memory.
690 *******************************************************************************/
692 void literalstring_free(java_objectheader* string)
697 s = (java_lang_String *) string;
700 /* dispose memory of java.lang.String object */
701 FREE(s, java_lang_String);
703 /* dispose memory of java-characterarray */
704 FREE(a, sizeof(java_chararray) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
709 * These are local overrides for various environment variables in Emacs.
710 * Please do not remove this and leave it at the end of the file, where
711 * Emacs will automagically detect them.
712 * ---------------------------------------------------------------------
715 * indent-tabs-mode: t
719 * vim:noexpandtab:sw=4:ts=4: