1 /* src/vm/string.c - java.lang.String related functions
3 Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
4 C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5 E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6 J. Wenninger, Institut f. Computersprachen - TU Wien
8 This file is part of CACAO.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2, or (at
13 your option) any later version.
15 This program is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 $Id: string.c 8321 2007-08-16 11:37:25Z michi $
36 #include "vm/global.h"
38 #include "mm/memory.h"
40 #include "native/jni.h"
41 #include "native/llni.h"
43 #include "native/include/java_lang_String.h"
45 #include "threads/lock-common.h"
47 #include "vm/builtin.h"
48 #include "vm/exceptions.h"
49 #include "vm/primitive.h"
50 #include "vm/stringlocal.h"
53 #include "vmcore/options.h"
54 #include "vmcore/statistics.h"
55 #include "vmcore/utf8.h"
58 /* global variables ***********************************************************/
60 /* hashsize must be power of 2 */
62 #define HASHTABLE_STRING_SIZE 2048 /* initial size of javastring-hash */
64 hashtable hashtable_string; /* hashtable for javastrings */
66 #if defined(ENABLE_THREADS)
67 static java_object_t *lock_hashtable_string;
71 /* string_init *****************************************************************
73 Initialize the string hashtable lock.
75 *******************************************************************************/
77 bool string_init(void)
79 /* create string (javastring) hashtable */
81 hashtable_create(&hashtable_string, HASHTABLE_STRING_SIZE);
83 #if defined(ENABLE_THREADS)
84 /* create string hashtable lock object */
86 lock_hashtable_string = NEW(java_object_t);
88 LOCK_INIT_OBJECT_LOCK(lock_hashtable_string);
97 /* stringtable_update **********************************************************
99 Traverses the javastring hashtable and sets the vftbl-entries of
100 javastrings which were temporarily set to NULL, because
101 java.lang.Object was not yet loaded.
103 *******************************************************************************/
105 void stringtable_update(void)
107 java_lang_String *js;
109 literalstring *s; /* hashtable entry */
112 for (i = 0; i < hashtable_string.size; i++) {
113 s = hashtable_string.ptr[i];
116 js = (java_lang_String *) s->string;
118 if ((js == NULL) || (js->value == NULL)) {
119 /* error in hashtable found */
121 vm_abort("stringtable_update: invalid literalstring in hashtable");
124 LLNI_field_get_ref(js, value, a);
126 if (!js->header.vftbl)
127 /* vftbl of javastring is NULL */
128 js->header.vftbl = class_java_lang_String->vftbl;
130 if (!a->header.objheader.vftbl)
131 /* vftbl of character-array is NULL */
132 a->header.objheader.vftbl =
133 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
135 /* follow link in external hash chain */
143 /* javastring_new_from_utf_buffer **********************************************
145 Create a new object of type java/lang/String with the text from
146 the specified utf8 buffer.
149 buffer.......points to first char in the buffer
150 blength......number of bytes to read from the buffer
153 the java.lang.String object, or
154 NULL if an exception has been thrown
156 *******************************************************************************/
158 static java_handle_t *javastring_new_from_utf_buffer(const char *buffer,
161 const char *utf_ptr; /* current utf character in utf string */
162 u4 utflength; /* length of utf-string if uncompressed */
164 java_lang_String *s; /* result-string */
165 java_handle_chararray_t *a;
170 utflength = utf_get_number_of_u2s_for_buffer(buffer,blength);
172 o = builtin_new(class_java_lang_String);
173 a = builtin_newarray_char(utflength);
175 /* javastring or character-array could not be created */
177 if ((o == NULL) || (a == NULL))
180 /* decompress utf-string */
184 for (i = 0; i < utflength; i++)
185 LLNI_array_direct(a, i) = utf_nextu2((char **) &utf_ptr);
187 /* set fields of the javastring-object */
189 s = (java_lang_String *) o;
191 LLNI_field_set_ref(s, value , a);
192 LLNI_field_set_val(s, offset, 0);
193 LLNI_field_set_val(s, count , utflength);
199 /* javastring_safe_new_from_utf8 ***********************************************
201 Create a new object of type java/lang/String with the text from
202 the specified UTF-8 string. This function is safe for invalid UTF-8.
203 (Invalid characters will be replaced by U+fffd.)
206 text.........the UTF-8 string, zero-terminated.
209 the java.lang.String object, or
210 NULL if an exception has been thrown
212 *******************************************************************************/
214 java_handle_t *javastring_safe_new_from_utf8(const char *text)
217 java_handle_chararray_t *a;
224 /* Get number of bytes. We need this to completely emulate the messy */
225 /* behaviour of the RI. :( */
227 nbytes = strlen(text);
229 /* calculate number of Java characters */
231 len = utf8_safe_number_of_u2s(text, nbytes);
233 /* allocate the String object and the char array */
235 o = builtin_new(class_java_lang_String);
236 a = builtin_newarray_char(len);
238 /* javastring or character-array could not be created? */
240 if ((o == NULL) || (a == NULL))
243 /* decompress UTF-8 string */
245 utf8_safe_convert_to_u2s(text, nbytes, a->data);
247 /* set fields of the String object */
249 s = (java_lang_String *) o;
251 LLNI_field_set_ref(s, value , a);
252 LLNI_field_set_val(s, offset, 0);
253 LLNI_field_set_val(s, count , len);
259 /* javastring_new_from_utf_string **********************************************
261 Create a new object of type java/lang/String with the text from
262 the specified zero-terminated utf8 string.
265 buffer.......points to first char in the buffer
266 blength......number of bytes to read from the buffer
269 the java.lang.String object, or
270 NULL if an exception has been thrown
272 *******************************************************************************/
274 java_handle_t *javastring_new_from_utf_string(const char *utfstr)
278 return javastring_new_from_utf_buffer(utfstr, strlen(utfstr));
282 /* javastring_new **************************************************************
284 creates a new object of type java/lang/String with the text of
285 the specified utf8-string
287 return: pointer to the string or NULL if memory is exhausted.
289 *******************************************************************************/
291 java_handle_t *javastring_new(utf *u)
293 char *utf_ptr; /* current utf character in utf string */
294 u4 utflength; /* length of utf-string if uncompressed */
296 java_handle_chararray_t *a;
301 exceptions_throw_nullpointerexception();
306 utflength = utf_get_number_of_u2s(u);
308 o = builtin_new(class_java_lang_String);
309 a = builtin_newarray_char(utflength);
311 /* javastring or character-array could not be created */
313 if ((o == NULL) || (a == NULL))
316 /* decompress utf-string */
318 for (i = 0; i < utflength; i++)
319 LLNI_array_direct(a, i) = utf_nextu2(&utf_ptr);
321 /* set fields of the javastring-object */
323 s = (java_lang_String *) o;
325 LLNI_field_set_ref(s, value , a);
326 LLNI_field_set_val(s, offset, 0);
327 LLNI_field_set_val(s, count , utflength);
333 /* javastring_new_slash_to_dot *************************************************
335 creates a new object of type java/lang/String with the text of
336 the specified utf8-string with slashes changed to dots
338 return: pointer to the string or NULL if memory is exhausted.
340 *******************************************************************************/
342 java_handle_t *javastring_new_slash_to_dot(utf *u)
344 char *utf_ptr; /* current utf character in utf string */
345 u4 utflength; /* length of utf-string if uncompressed */
347 java_handle_chararray_t *a;
353 exceptions_throw_nullpointerexception();
358 utflength = utf_get_number_of_u2s(u);
360 o = builtin_new(class_java_lang_String);
361 a = builtin_newarray_char(utflength);
363 /* javastring or character-array could not be created */
364 if ((o == NULL) || (a == NULL))
367 /* decompress utf-string */
369 for (i = 0; i < utflength; i++) {
370 ch = utf_nextu2(&utf_ptr);
373 LLNI_array_direct(a, i) = ch;
376 /* set fields of the javastring-object */
378 s = (java_lang_String *) o;
380 LLNI_field_set_ref(s, value , a);
381 LLNI_field_set_val(s, offset, 0);
382 LLNI_field_set_val(s, count , utflength);
388 /* javastring_new_from_ascii ***************************************************
390 creates a new java/lang/String object which contains the given ASCII
391 C-string converted to UTF-16.
394 text.........string of ASCII characters
397 the java.lang.String object, or
398 NULL if an exception has been thrown.
400 *******************************************************************************/
402 java_handle_t *javastring_new_from_ascii(const char *text)
405 s4 len; /* length of the string */
408 java_handle_chararray_t *a;
411 exceptions_throw_nullpointerexception();
417 o = builtin_new(class_java_lang_String);
418 a = builtin_newarray_char(len);
420 /* javastring or character-array could not be created */
422 if ((o == NULL) || (a == NULL))
427 for (i = 0; i < len; i++)
428 LLNI_array_direct(a, i) = text[i];
430 /* set fields of the javastring-object */
432 s = (java_lang_String *) o;
434 LLNI_field_set_ref(s, value , a);
435 LLNI_field_set_val(s, offset, 0);
436 LLNI_field_set_val(s, count , len);
442 /* javastring_tochar ***********************************************************
444 converts a Java string into a C string.
446 return: pointer to C string
448 Caution: calling method MUST release the allocated memory!
450 *******************************************************************************/
452 char *javastring_tochar(java_handle_t *so)
454 java_lang_String *s = (java_lang_String *) so;
455 java_handle_chararray_t *a;
462 LLNI_field_get_ref(s, value, a);
467 buf = MNEW(char, LLNI_field_direct(s, count) + 1);
469 for (i = 0; i < LLNI_field_direct(s, count); i++)
470 buf[i] = a->data[LLNI_field_direct(s, offset) + i];
478 /* javastring_toutf ************************************************************
480 Make utf symbol from javastring.
482 *******************************************************************************/
484 utf *javastring_toutf(java_handle_t *string, bool isclassname)
488 s = (java_lang_String *) string;
493 return utf_new_u2(LLNI_field_direct(s, value)->data + LLNI_field_direct(s, offset), LLNI_field_direct(s, count), isclassname);
497 /* literalstring_u2 ************************************************************
499 Searches for the javastring with the specified u2-array in the
500 string hashtable, if there is no such string a new one is created.
502 If copymode is true a copy of the u2-array is made.
504 *******************************************************************************/
506 java_object_t *literalstring_u2(java_chararray_t *a, u4 length, u4 offset,
509 literalstring *s; /* hashtable element */
510 java_lang_String *js; /* u2-array wrapped in javastring */
511 java_chararray_t *ca; /* copy of u2-array */
516 LOCK_MONITOR_ENTER(lock_hashtable_string);
518 /* find location in hashtable */
520 key = unicode_hashkey(a->data + offset, length);
521 slot = key & (hashtable_string.size - 1);
522 s = hashtable_string.ptr[slot];
525 js = (java_lang_String *) s->string;
527 if (length == js->count) {
530 for (i = 0; i < length; i++)
531 if (a->data[offset + i] != js->value->data[i])
534 /* string already in hashtable, free memory */
537 mem_free(a, sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10);
539 LOCK_MONITOR_EXIT(lock_hashtable_string);
541 return (java_object_t *) js;
545 /* follow link in external hash chain */
550 /* create copy of u2-array for new javastring */
551 u4 arraysize = sizeof(java_chararray_t) + sizeof(u2) * (length - 1) + 10;
552 ca = mem_alloc(arraysize);
553 /* memcpy(ca, a, arraysize); */
554 memcpy(&(ca->header), &(a->header), sizeof(java_array_t));
555 memcpy(&(ca->data), &(a->data) + offset, sizeof(u2) * (length - 1) + 10);
561 /* location in hashtable found, complete arrayheader */
563 ca->header.objheader.vftbl =
564 primitive_arrayclass_get_by_type(ARRAYTYPE_CHAR)->vftbl;
565 ca->header.size = length;
567 assert(class_java_lang_String);
568 assert(class_java_lang_String->state & CLASS_LOADED);
570 /* create new javastring */
572 js = NEW(java_lang_String);
574 #if defined(ENABLE_STATISTICS)
576 size_string += sizeof(java_lang_String);
579 #if defined(ENABLE_THREADS)
580 lock_init_object_lock(&js->header);
583 js->header.vftbl = class_java_lang_String->vftbl;
588 /* create new literalstring */
590 s = NEW(literalstring);
592 #if defined(ENABLE_STATISTICS)
594 size_string += sizeof(literalstring);
597 s->hashlink = hashtable_string.ptr[slot];
598 s->string = (java_object_t *) js;
599 hashtable_string.ptr[slot] = s;
601 /* update number of hashtable entries */
603 hashtable_string.entries++;
605 /* reorganization of hashtable */
607 if (hashtable_string.entries > (hashtable_string.size * 2)) {
608 /* reorganization of hashtable, average length of the external
609 chains is approx. 2 */
613 literalstring *nexts;
614 java_lang_String *tmpjs;
615 hashtable newhash; /* the new hashtable */
617 /* create new hashtable, double the size */
619 hashtable_create(&newhash, hashtable_string.size * 2);
620 newhash.entries = hashtable_string.entries;
622 /* transfer elements to new hashtable */
624 for (i = 0; i < hashtable_string.size; i++) {
625 s = hashtable_string.ptr[i];
629 tmpjs = (java_lang_String *) s->string;
630 slot = unicode_hashkey(tmpjs->value->data, tmpjs->count) & (newhash.size - 1);
632 s->hashlink = newhash.ptr[slot];
633 newhash.ptr[slot] = s;
635 /* follow link in external hash chain */
640 /* dispose old table */
642 MFREE(hashtable_string.ptr, void*, hashtable_string.size);
643 hashtable_string = newhash;
646 LOCK_MONITOR_EXIT(lock_hashtable_string);
648 return (java_object_t *) js;
652 /* literalstring_new ***********************************************************
654 Creates a new javastring with the text of the utf-symbol and inserts it into
655 the string hashtable.
657 *******************************************************************************/
659 java_object_t *literalstring_new(utf *u)
661 char *utf_ptr; /* pointer to current unicode character */
663 u4 utflength; /* length of utf-string if uncompressed */
664 java_chararray_t *a; /* u2-array constructed from utf string */
668 utflength = utf_get_number_of_u2s(u);
670 /* allocate memory */
671 a = mem_alloc(sizeof(java_chararray_t) + sizeof(u2) * (utflength - 1) + 10);
673 /* convert utf-string to u2-array */
674 for (i = 0; i < utflength; i++)
675 a->data[i] = utf_nextu2(&utf_ptr);
677 return literalstring_u2(a, utflength, 0, false);
681 /* literalstring_free **********************************************************
683 Removes a javastring from memory.
685 *******************************************************************************/
687 void literalstring_free(java_object_t* string)
692 s = (java_lang_String *) string;
695 /* dispose memory of java.lang.String object */
696 FREE(s, java_lang_String);
698 /* dispose memory of java-characterarray */
699 FREE(a, sizeof(java_chararray_t) + sizeof(u2) * (a->header.size - 1)); /* +10 ?? */
704 * These are local overrides for various environment variables in Emacs.
705 * Please do not remove this and leave it at the end of the file, where
706 * Emacs will automagically detect them.
707 * ---------------------------------------------------------------------
710 * indent-tabs-mode: t
714 * vim:noexpandtab:sw=4:ts=4: