X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=src%2Fvm%2Futf8.c;h=f14cf3e2c0f30e4ffa58403829ff4632c8dd8230;hb=9d34504e8c6d2e4f20c0f9d7951d8d78ad60935b;hp=2a080be5edbcb37c216b98f033fd2e99e89bd50f;hpb=303d550738d8ceae49b7a40c9b13187a93bcc849;p=cacao.git diff --git a/src/vm/utf8.c b/src/vm/utf8.c index 2a080be5e..f14cf3e2c 100644 --- a/src/vm/utf8.c +++ b/src/vm/utf8.c @@ -1,9 +1,9 @@ /* src/vm/utf.c - utf functions - Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates, - R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner, - C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger, - Institut f. Computersprachen - TU Wien + Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel, + C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring, + E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, + J. Wenninger, Institut f. Computersprachen - TU Wien This file is part of CACAO. @@ -19,10 +19,10 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. - Contact: cacao@complang.tuwien.ac.at + Contact: cacao@cacaojvm.org Authors: Reinhard Grafl @@ -30,23 +30,46 @@ Andreas Krall Christian Thalinger - $Id: utf8.c 2560 2005-06-06 15:20:41Z twisti $ + $Id: utf8.c 4357 2006-01-22 23:33:38Z twisti $ */ #include +#include + +#include "config.h" +#include "vm/types.h" #include "mm/memory.h" + +#if defined(USE_THREADS) +# if defined(NATIVE_THREADS) +# include "threads/native/threads.h" +# else +# include "threads/green/threads.h" +# endif +#endif + +#include "vm/builtin.h" #include "vm/exceptions.h" +#include "vm/hashtable.h" #include "vm/options.h" #include "vm/statistics.h" #include "vm/stringlocal.h" -#include "vm/tables.h" #include "vm/utf8.h" +/* global variables ***********************************************************/ + +/* hashsize must be power of 2 */ -hashtable utf_hash; /* hashtable for utf8-symbols */ +#define HASHTABLE_UTF_SIZE 16384 /* initial size of utf-hash */ + +hashtable hashtable_utf; /* hashtable for utf8-symbols */ + +#if defined(USE_THREADS) +static java_objectheader *lock_hashtable_utf; +#endif /* utf-symbols for pointer comparison of frequently used strings **************/ @@ -65,10 +88,17 @@ utf *utf_java_io_Serializable; utf *utf_java_lang_Throwable; utf *utf_java_lang_VMThrowable; utf *utf_java_lang_Error; -utf *utf_java_lang_Exception; utf *utf_java_lang_NoClassDefFoundError; +utf *utf_java_lang_LinkageError; +utf *utf_java_lang_NoSuchMethodError; utf *utf_java_lang_OutOfMemoryError; + +utf *utf_java_lang_Exception; utf *utf_java_lang_ClassNotFoundException; +utf *utf_java_lang_IllegalArgumentException; +utf *utf_java_lang_IllegalMonitorStateException; + +utf *utf_java_lang_NullPointerException; utf* utf_java_lang_Void; utf* utf_java_lang_Boolean; @@ -80,10 +110,11 @@ utf* utf_java_lang_Long; utf* utf_java_lang_Float; utf* utf_java_lang_Double; -utf *utf_java_util_Vector; +utf *utf_java_lang_StackTraceElement; utf *utf_java_lang_reflect_Constructor; +utf *utf_java_lang_reflect_Field; utf *utf_java_lang_reflect_Method; - +utf *utf_java_util_Vector; utf *utf_InnerClasses; /* InnerClasses */ utf *utf_ConstantValue; /* ConstantValue */ @@ -96,10 +127,27 @@ utf *utf_init; /* */ utf *utf_clinit; /* */ utf *utf_clone; /* clone */ utf *utf_finalize; /* finalize */ +utf *utf_run; /* run */ + +utf *utf_add; /* add */ +utf *utf_remove; /* remove */ +utf *utf_put; /* put */ +utf *utf_get; /* get */ +utf *utf_value; /* value */ -utf *utf_printStackTrace; utf *utf_fillInStackTrace; +utf *utf_getSystemClassLoader; utf *utf_loadClass; +utf *utf_printStackTrace; + +utf *utf_Z; /* Z */ +utf *utf_B; /* B */ +utf *utf_C; /* C */ +utf *utf_S; /* S */ +utf *utf_I; /* I */ +utf *utf_J; /* J */ +utf *utf_F; /* F */ +utf *utf_D; /* D */ utf *utf_void__void; /* ()V */ utf *utf_boolean__void; /* (Z)V */ @@ -110,8 +158,11 @@ utf *utf_int__void; /* (I)V */ utf *utf_long__void; /* (J)V */ utf *utf_float__void; /* (F)V */ utf *utf_double__void; /* (D)V */ + +utf *utf_void__java_lang_ClassLoader; /* ()Ljava/lang/ClassLoader; */ utf *utf_void__java_lang_Object; /* ()Ljava/lang/Object; */ utf *utf_void__java_lang_Throwable; /* ()Ljava/lang/Throwable; */ +utf *utf_java_lang_Object__java_lang_Object; utf *utf_java_lang_String__void; /* (Ljava/lang/String;)V */ utf *utf_java_lang_String__java_lang_Class; utf *utf_java_lang_Throwable__void; /* (Ljava/lang/Throwable;)V */ @@ -127,8 +178,27 @@ utf *array_packagename; *******************************************************************************/ -void utf8_init(void) +bool utf8_init(void) { + /* create utf8 hashtable */ + + hashtable_create(&hashtable_utf, HASHTABLE_UTF_SIZE); + +#if defined(ENABLE_STATISTICS) + if (opt_stat) + count_utf_len += sizeof(utf*) * hashtable_utf.size; +#endif + +#if defined(USE_THREADS) + /* create utf hashtable lock object */ + + lock_hashtable_utf = NEW(java_objectheader); + +# if defined(NATIVE_THREADS) + initObjectLock(lock_hashtable_utf); +# endif +#endif + /* create utf-symbols for pointer comparison of frequently used strings */ utf_java_lang_Object = utf_new_char("java/lang/Object"); @@ -145,17 +215,33 @@ void utf8_init(void) utf_java_lang_Throwable = utf_new_char(string_java_lang_Throwable); utf_java_lang_VMThrowable = utf_new_char(string_java_lang_VMThrowable); utf_java_lang_Error = utf_new_char(string_java_lang_Error); - utf_java_lang_Exception = utf_new_char(string_java_lang_Exception); utf_java_lang_NoClassDefFoundError = utf_new_char(string_java_lang_NoClassDefFoundError); + utf_java_lang_LinkageError = + utf_new_char(string_java_lang_LinkageError); + + utf_java_lang_NoSuchMethodError = + utf_new_char(string_java_lang_NoSuchMethodError); + utf_java_lang_OutOfMemoryError = utf_new_char(string_java_lang_OutOfMemoryError); + utf_java_lang_Exception = utf_new_char(string_java_lang_Exception); + utf_java_lang_ClassNotFoundException = utf_new_char(string_java_lang_ClassNotFoundException); + utf_java_lang_IllegalArgumentException = + utf_new_char(string_java_lang_IllegalArgumentException); + + utf_java_lang_IllegalMonitorStateException = + utf_new_char(string_java_lang_IllegalMonitorStateException); + + utf_java_lang_NullPointerException = + utf_new_char(string_java_lang_NullPointerException); + utf_java_lang_Void = utf_new_char("java/lang/Void"); utf_java_lang_Boolean = utf_new_char("java/lang/Boolean"); utf_java_lang_Byte = utf_new_char("java/lang/Byte"); @@ -166,12 +252,15 @@ void utf8_init(void) utf_java_lang_Float = utf_new_char("java/lang/Float"); utf_java_lang_Double = utf_new_char("java/lang/Double"); - utf_java_util_Vector = utf_new_char("java/util/Vector"); + utf_java_lang_StackTraceElement = + utf_new_char("java/lang/StackTraceElement"); utf_java_lang_reflect_Constructor = utf_new_char("java/lang/reflect/Constructor"); + utf_java_lang_reflect_Field = utf_new_char("java/lang/reflect/Field"); utf_java_lang_reflect_Method = utf_new_char("java/lang/reflect/Method"); + utf_java_util_Vector = utf_new_char("java/util/Vector"); utf_InnerClasses = utf_new_char("InnerClasses"); utf_ConstantValue = utf_new_char("ConstantValue"); @@ -184,10 +273,27 @@ void utf8_init(void) utf_clinit = utf_new_char(""); utf_clone = utf_new_char("clone"); utf_finalize = utf_new_char("finalize"); + utf_run = utf_new_char("run"); + + utf_add = utf_new_char("add"); + utf_remove = utf_new_char("remove"); + utf_put = utf_new_char("put"); + utf_get = utf_new_char("get"); + utf_value = utf_new_char("value"); utf_printStackTrace = utf_new_char("printStackTrace"); utf_fillInStackTrace = utf_new_char("fillInStackTrace"); utf_loadClass = utf_new_char("loadClass"); + utf_getSystemClassLoader = utf_new_char("getSystemClassLoader"); + + utf_Z = utf_new_char("Z"); + utf_B = utf_new_char("B"); + utf_C = utf_new_char("C"); + utf_S = utf_new_char("S"); + utf_I = utf_new_char("I"); + utf_J = utf_new_char("J"); + utf_F = utf_new_char("F"); + utf_D = utf_new_char("D"); utf_void__void = utf_new_char("()V"); utf_boolean__void = utf_new_char("(Z)V"); @@ -200,6 +306,13 @@ void utf8_init(void) utf_double__void = utf_new_char("(D)V"); utf_void__java_lang_Object = utf_new_char("()Ljava/lang/Object;"); utf_void__java_lang_Throwable = utf_new_char("()Ljava/lang/Throwable;"); + + utf_void__java_lang_ClassLoader = + utf_new_char("()Ljava/lang/ClassLoader;"); + + utf_java_lang_Object__java_lang_Object = + utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;"); + utf_java_lang_String__void = utf_new_char("(Ljava/lang/String;)V"); utf_java_lang_String__java_lang_Class = @@ -210,6 +323,10 @@ void utf8_init(void) utf_not_named_yet = utf_new_char("\t"); array_packagename = utf_new_char("\t"); + + /* everything's ok */ + + return true; } @@ -362,116 +479,120 @@ u4 unicode_hashkey(u2 *text, u2 len) *******************************************************************************/ -utf *utf_new_intern(const char *text, u2 length); - utf *utf_new(const char *text, u2 length) -{ - utf *r; - -#if defined(USE_THREADS) && defined(NATIVE_THREADS) - tables_lock(); -#endif - - r = utf_new_intern(text, length); - -#if defined(USE_THREADS) && defined(NATIVE_THREADS) - tables_unlock(); -#endif - - return r; -} - - -utf *utf_new_intern(const char *text, u2 length) { u4 key; /* hashkey computed from utf-text */ u4 slot; /* slot in hashtable */ utf *u; /* hashtable element */ u2 i; -#ifdef STATISTICS +#if defined(USE_THREADS) + builtin_monitorenter(lock_hashtable_utf); +#endif + +#if defined(ENABLE_STATISTICS) if (opt_stat) count_utf_new++; #endif key = utf_hashkey(text, length); - slot = key & (utf_hash.size - 1); - u = utf_hash.ptr[slot]; + slot = key & (hashtable_utf.size - 1); + u = hashtable_utf.ptr[slot]; /* search external hash chain for utf-symbol */ + while (u) { if (u->blength == length) { - /* compare text of hashtable elements */ + for (i = 0; i < length; i++) - if (text[i] != u->text[i]) goto nomatch; + if (text[i] != u->text[i]) + goto nomatch; -#ifdef STATISTICS +#if defined(ENABLE_STATISTICS) if (opt_stat) count_utf_new_found++; #endif /* symbol found in hashtable */ + +#if defined(USE_THREADS) + builtin_monitorexit(lock_hashtable_utf); +#endif + return u; } + nomatch: u = u->hashlink; /* next element in external chain */ } -#ifdef STATISTICS +#if defined(ENABLE_STATISTICS) if (opt_stat) - count_utf_len += sizeof(utf) + length; + count_utf_len += sizeof(utf) + length + 1; #endif /* location in hashtable found, create new utf element */ u = NEW(utf); u->blength = length; /* length in bytes of utfstring */ - u->hashlink = utf_hash.ptr[slot]; /* link in external hashchain */ + u->hashlink = hashtable_utf.ptr[slot]; /* link in external hashchain */ u->text = mem_alloc(length + 1);/* allocate memory for utf-text */ + memcpy(u->text, text, length); /* copy utf-text */ u->text[length] = '\0'; - utf_hash.ptr[slot] = u; /* insert symbol into table */ - utf_hash.entries++; /* update number of entries */ + hashtable_utf.ptr[slot] = u; /* insert symbol into table */ + hashtable_utf.entries++; /* update number of entries */ - if (utf_hash.entries > (utf_hash.size * 2)) { + if (hashtable_utf.entries > (hashtable_utf.size * 2)) { - /* reorganization of hashtable, average length of - the external chains is approx. 2 */ + /* reorganization of hashtable, average length of the external + chains is approx. 2 */ - u4 i; - utf *u; - hashtable newhash; /* the new hashtable */ + hashtable newhash; /* the new hashtable */ + u4 i; + utf *u; + utf *nextu; + u4 slot; /* create new hashtable, double the size */ - init_hashtable(&newhash, utf_hash.size * 2); - newhash.entries = utf_hash.entries; -#ifdef STATISTICS + hashtable_create(&newhash, hashtable_utf.size * 2); + newhash.entries = hashtable_utf.entries; + +#if defined(ENABLE_STATISTICS) if (opt_stat) - count_utf_len += sizeof(utf*) * utf_hash.size; + count_utf_len += sizeof(utf*) * hashtable_utf.size; #endif /* transfer elements to new hashtable */ - for (i = 0; i < utf_hash.size; i++) { - u = (utf *) utf_hash.ptr[i]; + + for (i = 0; i < hashtable_utf.size; i++) { + u = hashtable_utf.ptr[i]; + while (u) { - utf *nextu = u->hashlink; - u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1); + nextu = u->hashlink; + slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1); u->hashlink = (utf *) newhash.ptr[slot]; newhash.ptr[slot] = u; /* follow link in external hash chain */ + u = nextu; } } /* dispose old table */ - MFREE(utf_hash.ptr, void*, utf_hash.size); - utf_hash = newhash; + + MFREE(hashtable_utf.ptr, void*, hashtable_utf.size); + hashtable_utf = newhash; } +#if defined(USE_THREADS) + builtin_monitorexit(lock_hashtable_utf); +#endif + return u; } @@ -654,11 +775,11 @@ u4 utf_strlen(utf *u) u4 len = 0; /* number of unicode characters */ if (!u) { - *exceptionptr = new_nullpointerexception(); + exceptions_throw_nullpointerexception(); return 0; } - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) { @@ -722,7 +843,7 @@ void utf_display(utf *u) return; } - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) { @@ -754,7 +875,7 @@ void utf_display_classname(utf *u) return; } - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) { @@ -786,7 +907,7 @@ void utf_sprint(char *buffer, utf *u) return; } - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) @@ -816,7 +937,7 @@ void utf_sprint_classname(char *buffer, utf *u) return; } - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) { @@ -869,7 +990,7 @@ void utf_fprint(FILE *file, utf *u) if (!u) return; - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) { @@ -896,7 +1017,7 @@ void utf_fprint_classname(FILE *file, utf *u) if (!u) return; - endpos = utf_end(u); + endpos = UTF_END(u); utf_ptr = u->text; while (utf_ptr < endpos) { @@ -919,7 +1040,7 @@ void utf_fprint_classname(FILE *file, utf *u) *******************************************************************************/ -static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; +/* static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */ bool is_valid_utf(char *utf_ptr, char *end_pos) { @@ -962,11 +1083,8 @@ bool is_valid_utf(char *utf_ptr, char *end_pos) } else { /* Sun Java seems to allow overlong UTF-8 encodings */ - if (v < min_codepoint[len]) { /* overlong UTF-8 */ - if (!opt_liberalutf) - fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n"); - /* XXX change this to exception? */ - } + /* if (v < min_codepoint[len]) */ + /* XXX throw exception? */ } /* surrogates in UTF-8 seem to be allowed in Java classfiles */ @@ -1010,7 +1128,7 @@ bool is_valid_name(char *utf_ptr, char *end_pos) bool is_valid_name_utf(utf *u) { - return is_valid_name(u->text,utf_end(u)); + return is_valid_name(u->text, UTF_END(u)); } @@ -1022,6 +1140,7 @@ bool is_valid_name_utf(utf *u) *******************************************************************************/ +#if !defined(NDEBUG) void utf_show(void) { @@ -1033,29 +1152,30 @@ void utf_show(void) u4 beyond_limit = 0; /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */ u4 i; - printf ("UTF-HASH:\n"); + printf("UTF-HASH:\n"); /* show element of utf-hashtable */ - for (i=0; ihashlink; } - printf ("\n"); + printf("\n"); } - } - printf ("UTF-HASH: %d slots for %d entries\n", - (int) utf_hash.size, (int) utf_hash.entries ); - + printf("UTF-HASH: %d slots for %d entries\n", + (int) hashtable_utf.size, (int) hashtable_utf.entries ); - if (utf_hash.entries == 0) + if (hashtable_utf.entries == 0) return; printf("chains:\n chainlength number of chains %% of utfstrings\n"); @@ -1064,9 +1184,9 @@ void utf_show(void) chain_count[i]=0; /* count numbers of hashchains according to their length */ - for (i=0; i=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries); + printf(" >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf.entries); printf("max. chainlength:%5d\n",max_chainlength); /* avg. chainlength = sum of chainlengths / number of chains */ - printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0])); + printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf.size-chain_count[0])); } +#endif /* !defined(NDEBUG) */ /*