/* src/vm/utf.c - utf functions
- Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates,
- R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner,
- C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger,
- Institut f. Computersprachen - TU Wien
+ Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
+ C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
+ E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
+ J. Wenninger, Institut f. Computersprachen - TU Wien
This file is part of CACAO.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA.
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
- Contact: cacao@complang.tuwien.ac.at
+ Contact: cacao@cacaojvm.org
Authors: Reinhard Grafl
Andreas Krall
Christian Thalinger
- $Id: utf8.c 2136 2005-03-30 10:03:03Z twisti $
+ $Id: utf8.c 4357 2006-01-22 23:33:38Z twisti $
*/
#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "vm/types.h"
#include "mm/memory.h"
+
+#if defined(USE_THREADS)
+# if defined(NATIVE_THREADS)
+# include "threads/native/threads.h"
+# else
+# include "threads/green/threads.h"
+# endif
+#endif
+
+#include "vm/builtin.h"
#include "vm/exceptions.h"
+#include "vm/hashtable.h"
#include "vm/options.h"
#include "vm/statistics.h"
-#include "vm/tables.h"
+#include "vm/stringlocal.h"
#include "vm/utf8.h"
+/* global variables ***********************************************************/
+
+/* hashsize must be power of 2 */
-hashtable utf_hash; /* hashtable for utf8-symbols */
+#define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */
+
+hashtable hashtable_utf; /* hashtable for utf8-symbols */
+
+#if defined(USE_THREADS)
+static java_objectheader *lock_hashtable_utf;
+#endif
/* utf-symbols for pointer comparison of frequently used strings **************/
utf *utf_java_lang_SecurityManager;
utf *utf_java_lang_String;
utf *utf_java_lang_System;
+utf *utf_java_lang_ThreadGroup;
utf *utf_java_io_Serializable;
utf *utf_java_lang_Throwable;
utf *utf_java_lang_VMThrowable;
-utf *utf_java_lang_Exception;
utf *utf_java_lang_Error;
+utf *utf_java_lang_NoClassDefFoundError;
+utf *utf_java_lang_LinkageError;
+utf *utf_java_lang_NoSuchMethodError;
utf *utf_java_lang_OutOfMemoryError;
+utf *utf_java_lang_Exception;
+utf *utf_java_lang_ClassNotFoundException;
+utf *utf_java_lang_IllegalArgumentException;
+utf *utf_java_lang_IllegalMonitorStateException;
+
+utf *utf_java_lang_NullPointerException;
+
utf* utf_java_lang_Void;
utf* utf_java_lang_Boolean;
utf* utf_java_lang_Byte;
utf* utf_java_lang_Float;
utf* utf_java_lang_Double;
+utf *utf_java_lang_StackTraceElement;
+utf *utf_java_lang_reflect_Constructor;
+utf *utf_java_lang_reflect_Field;
+utf *utf_java_lang_reflect_Method;
utf *utf_java_util_Vector;
utf *utf_InnerClasses; /* InnerClasses */
utf *utf_init; /* <init> */
utf *utf_clinit; /* <clinit> */
+utf *utf_clone; /* clone */
utf *utf_finalize; /* finalize */
+utf *utf_run; /* run */
+
+utf *utf_add; /* add */
+utf *utf_remove; /* remove */
+utf *utf_put; /* put */
+utf *utf_get; /* get */
+utf *utf_value; /* value */
-utf *utf_printStackTrace;
utf *utf_fillInStackTrace;
+utf *utf_getSystemClassLoader;
utf *utf_loadClass;
+utf *utf_printStackTrace;
+
+utf *utf_Z; /* Z */
+utf *utf_B; /* B */
+utf *utf_C; /* C */
+utf *utf_S; /* S */
+utf *utf_I; /* I */
+utf *utf_J; /* J */
+utf *utf_F; /* F */
+utf *utf_D; /* D */
utf *utf_void__void; /* ()V */
utf *utf_boolean__void; /* (Z)V */
utf *utf_long__void; /* (J)V */
utf *utf_float__void; /* (F)V */
utf *utf_double__void; /* (D)V */
+
+utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */
+utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */
utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */
+utf *utf_java_lang_Object__java_lang_Object;
utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */
utf *utf_java_lang_String__java_lang_Class;
utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */
+utf *utf_not_named_yet; /* special name for unnamed classes */
+
utf *array_packagename;
*******************************************************************************/
-void utf8_init(void)
+bool utf8_init(void)
{
+ /* create utf8 hashtable */
+
+ hashtable_create(&hashtable_utf, HASHTABLE_UTF_SIZE);
+
+#if defined(ENABLE_STATISTICS)
+ if (opt_stat)
+ count_utf_len += sizeof(utf*) * hashtable_utf.size;
+#endif
+
+#if defined(USE_THREADS)
+ /* create utf hashtable lock object */
+
+ lock_hashtable_utf = NEW(java_objectheader);
+
+# if defined(NATIVE_THREADS)
+ initObjectLock(lock_hashtable_utf);
+# endif
+#endif
+
/* create utf-symbols for pointer comparison of frequently used strings */
utf_java_lang_Object = utf_new_char("java/lang/Object");
utf_java_lang_SecurityManager = utf_new_char("java/lang/SecurityManager");
utf_java_lang_String = utf_new_char("java/lang/String");
utf_java_lang_System = utf_new_char("java/lang/System");
+ utf_java_lang_ThreadGroup = utf_new_char("java/lang/ThreadGroup");
utf_java_io_Serializable = utf_new_char("java/io/Serializable");
- utf_java_lang_Throwable = utf_new_char("java/lang/Throwable");
- utf_java_lang_VMThrowable = utf_new_char("java/lang/VMThrowable");
- utf_java_lang_Exception = utf_new_char("java/lang/Exception");
- utf_java_lang_Error = utf_new_char("java/lang/Error");
- utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
+ utf_java_lang_Throwable = utf_new_char(string_java_lang_Throwable);
+ utf_java_lang_VMThrowable = utf_new_char(string_java_lang_VMThrowable);
+ utf_java_lang_Error = utf_new_char(string_java_lang_Error);
+
+ utf_java_lang_NoClassDefFoundError =
+ utf_new_char(string_java_lang_NoClassDefFoundError);
+
+ utf_java_lang_LinkageError =
+ utf_new_char(string_java_lang_LinkageError);
+
+ utf_java_lang_NoSuchMethodError =
+ utf_new_char(string_java_lang_NoSuchMethodError);
+
+ utf_java_lang_OutOfMemoryError =
+ utf_new_char(string_java_lang_OutOfMemoryError);
+
+ utf_java_lang_Exception = utf_new_char(string_java_lang_Exception);
+
+ utf_java_lang_ClassNotFoundException =
+ utf_new_char(string_java_lang_ClassNotFoundException);
+
+ utf_java_lang_IllegalArgumentException =
+ utf_new_char(string_java_lang_IllegalArgumentException);
+
+ utf_java_lang_IllegalMonitorStateException =
+ utf_new_char(string_java_lang_IllegalMonitorStateException);
+
+ utf_java_lang_NullPointerException =
+ utf_new_char(string_java_lang_NullPointerException);
utf_java_lang_Void = utf_new_char("java/lang/Void");
utf_java_lang_Boolean = utf_new_char("java/lang/Boolean");
utf_java_lang_Float = utf_new_char("java/lang/Float");
utf_java_lang_Double = utf_new_char("java/lang/Double");
+ utf_java_lang_StackTraceElement =
+ utf_new_char("java/lang/StackTraceElement");
+
+ utf_java_lang_reflect_Constructor =
+ utf_new_char("java/lang/reflect/Constructor");
+
+ utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field");
+ utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method");
utf_java_util_Vector = utf_new_char("java/util/Vector");
utf_InnerClasses = utf_new_char("InnerClasses");
utf_init = utf_new_char("<init>");
utf_clinit = utf_new_char("<clinit>");
+ utf_clone = utf_new_char("clone");
utf_finalize = utf_new_char("finalize");
+ utf_run = utf_new_char("run");
+
+ utf_add = utf_new_char("add");
+ utf_remove = utf_new_char("remove");
+ utf_put = utf_new_char("put");
+ utf_get = utf_new_char("get");
+ utf_value = utf_new_char("value");
utf_printStackTrace = utf_new_char("printStackTrace");
utf_fillInStackTrace = utf_new_char("fillInStackTrace");
utf_loadClass = utf_new_char("loadClass");
+ utf_getSystemClassLoader = utf_new_char("getSystemClassLoader");
+
+ utf_Z = utf_new_char("Z");
+ utf_B = utf_new_char("B");
+ utf_C = utf_new_char("C");
+ utf_S = utf_new_char("S");
+ utf_I = utf_new_char("I");
+ utf_J = utf_new_char("J");
+ utf_F = utf_new_char("F");
+ utf_D = utf_new_char("D");
utf_void__void = utf_new_char("()V");
utf_boolean__void = utf_new_char("(Z)V");
utf_long__void = utf_new_char("(J)V");
utf_float__void = utf_new_char("(F)V");
utf_double__void = utf_new_char("(D)V");
+ utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;");
utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;");
+
+ utf_void__java_lang_ClassLoader =
+ utf_new_char("()Ljava/lang/ClassLoader;");
+
+ utf_java_lang_Object__java_lang_Object =
+ utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
+
utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V");
utf_java_lang_String__java_lang_Class =
utf_java_lang_Throwable__void = utf_new_char("(Ljava/lang/Throwable;)V");
+ utf_not_named_yet = utf_new_char("\t<not_named_yet>");
+
array_packagename = utf_new_char("\t<the array package>");
+
+ /* everything's ok */
+
+ return true;
}
*******************************************************************************/
-utf *utf_new_intern(const char *text, u2 length);
-
utf *utf_new(const char *text, u2 length)
-{
- utf *r;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
- tables_lock();
-#endif
-
- r = utf_new_intern(text, length);
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
- tables_unlock();
-#endif
-
- return r;
-}
-
-
-utf *utf_new_intern(const char *text, u2 length)
{
u4 key; /* hashkey computed from utf-text */
u4 slot; /* slot in hashtable */
utf *u; /* hashtable element */
u2 i;
-#ifdef STATISTICS
+#if defined(USE_THREADS)
+ builtin_monitorenter(lock_hashtable_utf);
+#endif
+
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
count_utf_new++;
#endif
key = utf_hashkey(text, length);
- slot = key & (utf_hash.size - 1);
- u = utf_hash.ptr[slot];
+ slot = key & (hashtable_utf.size - 1);
+ u = hashtable_utf.ptr[slot];
/* search external hash chain for utf-symbol */
+
while (u) {
if (u->blength == length) {
-
/* compare text of hashtable elements */
+
for (i = 0; i < length; i++)
- if (text[i] != u->text[i]) goto nomatch;
+ if (text[i] != u->text[i])
+ goto nomatch;
-#ifdef STATISTICS
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
count_utf_new_found++;
#endif
/* symbol found in hashtable */
+
+#if defined(USE_THREADS)
+ builtin_monitorexit(lock_hashtable_utf);
+#endif
+
return u;
}
+
nomatch:
u = u->hashlink; /* next element in external chain */
}
-#ifdef STATISTICS
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
- count_utf_len += sizeof(utf) + length;
+ count_utf_len += sizeof(utf) + length + 1;
#endif
/* location in hashtable found, create new utf element */
u = NEW(utf);
u->blength = length; /* length in bytes of utfstring */
- u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */
+ u->hashlink = hashtable_utf.ptr[slot]; /* link in external hashchain */
u->text = mem_alloc(length + 1);/* allocate memory for utf-text */
+
memcpy(u->text, text, length); /* copy utf-text */
u->text[length] = '\0';
- utf_hash.ptr[slot] = u; /* insert symbol into table */
- utf_hash.entries++; /* update number of entries */
+ hashtable_utf.ptr[slot] = u; /* insert symbol into table */
+ hashtable_utf.entries++; /* update number of entries */
- if (utf_hash.entries > (utf_hash.size * 2)) {
+ if (hashtable_utf.entries > (hashtable_utf.size * 2)) {
- /* reorganization of hashtable, average length of
- the external chains is approx. 2 */
+ /* reorganization of hashtable, average length of the external
+ chains is approx. 2 */
- u4 i;
- utf *u;
- hashtable newhash; /* the new hashtable */
+ hashtable newhash; /* the new hashtable */
+ u4 i;
+ utf *u;
+ utf *nextu;
+ u4 slot;
/* create new hashtable, double the size */
- init_hashtable(&newhash, utf_hash.size * 2);
- newhash.entries = utf_hash.entries;
-#ifdef STATISTICS
+ hashtable_create(&newhash, hashtable_utf.size * 2);
+ newhash.entries = hashtable_utf.entries;
+
+#if defined(ENABLE_STATISTICS)
if (opt_stat)
- count_utf_len += sizeof(utf*) * utf_hash.size;
+ count_utf_len += sizeof(utf*) * hashtable_utf.size;
#endif
/* transfer elements to new hashtable */
- for (i = 0; i < utf_hash.size; i++) {
- u = (utf *) utf_hash.ptr[i];
+
+ for (i = 0; i < hashtable_utf.size; i++) {
+ u = hashtable_utf.ptr[i];
+
while (u) {
- utf *nextu = u->hashlink;
- u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
+ nextu = u->hashlink;
+ slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
u->hashlink = (utf *) newhash.ptr[slot];
newhash.ptr[slot] = u;
/* follow link in external hash chain */
+
u = nextu;
}
}
/* dispose old table */
- MFREE(utf_hash.ptr, void*, utf_hash.size);
- utf_hash = newhash;
+
+ MFREE(hashtable_utf.ptr, void*, hashtable_utf.size);
+ hashtable_utf = newhash;
}
+#if defined(USE_THREADS)
+ builtin_monitorexit(lock_hashtable_utf);
+#endif
+
return u;
}
u4 len = 0; /* number of unicode characters */
if (!u) {
- *exceptionptr = new_nullpointerexception();
+ exceptions_throw_nullpointerexception();
return 0;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos)
return;
}
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
if (!u)
return;
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
if (!u)
return;
- endpos = utf_end(u);
+ endpos = UTF_END(u);
utf_ptr = u->text;
while (utf_ptr < endpos) {
*******************************************************************************/
-static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
+/* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
bool is_valid_utf(char *utf_ptr, char *end_pos)
{
} else {
/* Sun Java seems to allow overlong UTF-8 encodings */
- if (v < min_codepoint[len]) { /* overlong UTF-8 */
- if (!opt_liberalutf)
- fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
- /* XXX change this to panic? */
- }
+ /* if (v < min_codepoint[len]) */
+ /* XXX throw exception? */
}
/* surrogates in UTF-8 seem to be allowed in Java classfiles */
bool is_valid_name_utf(utf *u)
{
- return is_valid_name(u->text,utf_end(u));
+ return is_valid_name(u->text, UTF_END(u));
}
*******************************************************************************/
+#if !defined(NDEBUG)
void utf_show(void)
{
u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
u4 i;
- printf ("UTF-HASH:\n");
+ printf("UTF-HASH:\n");
/* show element of utf-hashtable */
- for (i=0; i<utf_hash.size; i++) {
- utf *u = utf_hash.ptr[i];
+
+ for (i = 0; i < hashtable_utf.size; i++) {
+ utf *u = hashtable_utf.ptr[i];
+
if (u) {
- printf ("SLOT %d: ", (int) i);
+ printf("SLOT %d: ", (int) i);
+
while (u) {
- printf ("'");
- utf_display (u);
- printf ("' ");
+ printf("'");
+ utf_display(u);
+ printf("' ");
u = u->hashlink;
}
- printf ("\n");
+ printf("\n");
}
-
}
- printf ("UTF-HASH: %d slots for %d entries\n",
- (int) utf_hash.size, (int) utf_hash.entries );
-
+ printf("UTF-HASH: %d slots for %d entries\n",
+ (int) hashtable_utf.size, (int) hashtable_utf.entries );
- if (utf_hash.entries == 0)
+ if (hashtable_utf.entries == 0)
return;
printf("chains:\n chainlength number of chains %% of utfstrings\n");
chain_count[i]=0;
/* count numbers of hashchains according to their length */
- for (i=0; i<utf_hash.size; i++) {
+ for (i=0; i<hashtable_utf.size; i++) {
- utf *u = (utf*) utf_hash.ptr[i];
+ utf *u = (utf*) hashtable_utf.ptr[i];
u4 chain_length = 0;
/* determine chainlength */
/* display results */
for (i=1;i<CHAIN_LIMIT-1;i++)
- printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
+ printf(" %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf.entries));
- printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
+ printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf.entries);
printf("max. chainlength:%5d\n",max_chainlength);
/* avg. chainlength = sum of chainlengths / number of chains */
- printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
+ printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf.size-chain_count[0]));
}
+#endif /* !defined(NDEBUG) */
/*