* src/vm/jit/intrp/codegen.c (createcalljavafunction):
[cacao.git] / src / vm / utf8.c
index 2a080be5edbcb37c216b98f033fd2e99e89bd50f..f14cf3e2c0f30e4ffa58403829ff4632c8dd8230 100644 (file)
@@ -1,9 +1,9 @@
 /* src/vm/utf.c - utf functions
 
-   Copyright (C) 1996-2005 R. Grafl, A. Krall, C. Kruegel, C. Oates,
-   R. Obermaisser, M. Platter, M. Probst, S. Ring, E. Steiner,
-   C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich, J. Wenninger,
-   Institut f. Computersprachen - TU Wien
+   Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
+   C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
+   E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
+   J. Wenninger, Institut f. Computersprachen - TU Wien
 
    This file is part of CACAO.
 
 
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
 
-   Contact: cacao@complang.tuwien.ac.at
+   Contact: cacao@cacaojvm.org
 
    Authors: Reinhard Grafl
 
             Andreas Krall
             Christian Thalinger
 
-   $Id: utf8.c 2560 2005-06-06 15:20:41Z twisti $
+   $Id: utf8.c 4357 2006-01-22 23:33:38Z twisti $
 
 */
 
 
 #include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "vm/types.h"
 
 #include "mm/memory.h"
+
+#if defined(USE_THREADS)
+# if defined(NATIVE_THREADS)
+#  include "threads/native/threads.h"
+# else
+#  include "threads/green/threads.h"
+# endif
+#endif
+
+#include "vm/builtin.h"
 #include "vm/exceptions.h"
+#include "vm/hashtable.h"
 #include "vm/options.h"
 #include "vm/statistics.h"
 #include "vm/stringlocal.h"
-#include "vm/tables.h"
 #include "vm/utf8.h"
 
+/* global variables ***********************************************************/
+
+/* hashsize must be power of 2 */
 
-hashtable utf_hash;                     /* hashtable for utf8-symbols         */
+#define HASHTABLE_UTF_SIZE    16384     /* initial size of utf-hash           */
+
+hashtable hashtable_utf;                /* hashtable for utf8-symbols         */
+
+#if defined(USE_THREADS)
+static java_objectheader *lock_hashtable_utf;
+#endif
 
 
 /* utf-symbols for pointer comparison of frequently used strings **************/
@@ -65,10 +88,17 @@ utf *utf_java_io_Serializable;
 utf *utf_java_lang_Throwable;
 utf *utf_java_lang_VMThrowable;
 utf *utf_java_lang_Error;
-utf *utf_java_lang_Exception;
 utf *utf_java_lang_NoClassDefFoundError;
+utf *utf_java_lang_LinkageError;
+utf *utf_java_lang_NoSuchMethodError;
 utf *utf_java_lang_OutOfMemoryError;
+
+utf *utf_java_lang_Exception;
 utf *utf_java_lang_ClassNotFoundException;
+utf *utf_java_lang_IllegalArgumentException;
+utf *utf_java_lang_IllegalMonitorStateException;
+
+utf *utf_java_lang_NullPointerException;
 
 utf* utf_java_lang_Void;
 utf* utf_java_lang_Boolean;
@@ -80,10 +110,11 @@ utf* utf_java_lang_Long;
 utf* utf_java_lang_Float;
 utf* utf_java_lang_Double;
 
-utf *utf_java_util_Vector;
+utf *utf_java_lang_StackTraceElement;
 utf *utf_java_lang_reflect_Constructor;
+utf *utf_java_lang_reflect_Field;
 utf *utf_java_lang_reflect_Method;
-
+utf *utf_java_util_Vector;
 
 utf *utf_InnerClasses;                  /* InnerClasses                       */
 utf *utf_ConstantValue;                 /* ConstantValue                      */
@@ -96,10 +127,27 @@ utf *utf_init;                          /* <init>                             */
 utf *utf_clinit;                        /* <clinit>                           */
 utf *utf_clone;                         /* clone                              */
 utf *utf_finalize;                      /* finalize                           */
+utf *utf_run;                           /* run                                */
+
+utf *utf_add;                           /* add                                */
+utf *utf_remove;                        /* remove                             */
+utf *utf_put;                           /* put                                */
+utf *utf_get;                           /* get                                */
+utf *utf_value;                         /* value                              */
 
-utf *utf_printStackTrace;
 utf *utf_fillInStackTrace;
+utf *utf_getSystemClassLoader;
 utf *utf_loadClass;
+utf *utf_printStackTrace;
+
+utf *utf_Z;                             /* Z                                  */
+utf *utf_B;                             /* B                                  */
+utf *utf_C;                             /* C                                  */
+utf *utf_S;                             /* S                                  */
+utf *utf_I;                             /* I                                  */
+utf *utf_J;                             /* J                                  */
+utf *utf_F;                             /* F                                  */
+utf *utf_D;                             /* D                                  */
 
 utf *utf_void__void;                    /* ()V                                */
 utf *utf_boolean__void;                 /* (Z)V                               */
@@ -110,8 +158,11 @@ utf *utf_int__void;                     /* (I)V                               */
 utf *utf_long__void;                    /* (J)V                               */
 utf *utf_float__void;                   /* (F)V                               */
 utf *utf_double__void;                  /* (D)V                               */
+
+utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
 utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
+utf *utf_java_lang_Object__java_lang_Object;
 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 utf *utf_java_lang_String__java_lang_Class;
 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
@@ -127,8 +178,27 @@ utf *array_packagename;
 
 *******************************************************************************/
 
-void utf8_init(void)
+bool utf8_init(void)
 {
+       /* create utf8 hashtable */
+
+       hashtable_create(&hashtable_utf, HASHTABLE_UTF_SIZE);
+
+#if defined(ENABLE_STATISTICS)
+       if (opt_stat)
+               count_utf_len += sizeof(utf*) * hashtable_utf.size;
+#endif
+
+#if defined(USE_THREADS)
+       /* create utf hashtable lock object */
+
+       lock_hashtable_utf = NEW(java_objectheader);
+
+# if defined(NATIVE_THREADS)
+       initObjectLock(lock_hashtable_utf);
+# endif
+#endif
+
        /* create utf-symbols for pointer comparison of frequently used strings */
 
        utf_java_lang_Object           = utf_new_char("java/lang/Object");
@@ -145,17 +215,33 @@ void utf8_init(void)
        utf_java_lang_Throwable        = utf_new_char(string_java_lang_Throwable);
        utf_java_lang_VMThrowable      = utf_new_char(string_java_lang_VMThrowable);
        utf_java_lang_Error            = utf_new_char(string_java_lang_Error);
-       utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
 
        utf_java_lang_NoClassDefFoundError =
                utf_new_char(string_java_lang_NoClassDefFoundError);
 
+       utf_java_lang_LinkageError =
+               utf_new_char(string_java_lang_LinkageError);
+
+       utf_java_lang_NoSuchMethodError =
+               utf_new_char(string_java_lang_NoSuchMethodError);
+
        utf_java_lang_OutOfMemoryError =
                utf_new_char(string_java_lang_OutOfMemoryError);
 
+       utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
+
        utf_java_lang_ClassNotFoundException =
                utf_new_char(string_java_lang_ClassNotFoundException);
 
+       utf_java_lang_IllegalArgumentException =
+               utf_new_char(string_java_lang_IllegalArgumentException);
+
+       utf_java_lang_IllegalMonitorStateException =
+               utf_new_char(string_java_lang_IllegalMonitorStateException);
+
+       utf_java_lang_NullPointerException =
+               utf_new_char(string_java_lang_NullPointerException);
+
        utf_java_lang_Void             = utf_new_char("java/lang/Void");
        utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
        utf_java_lang_Byte             = utf_new_char("java/lang/Byte");
@@ -166,12 +252,15 @@ void utf8_init(void)
        utf_java_lang_Float            = utf_new_char("java/lang/Float");
        utf_java_lang_Double           = utf_new_char("java/lang/Double");
 
-       utf_java_util_Vector           = utf_new_char("java/util/Vector");
+       utf_java_lang_StackTraceElement =
+               utf_new_char("java/lang/StackTraceElement");
 
        utf_java_lang_reflect_Constructor =
                utf_new_char("java/lang/reflect/Constructor");
 
+       utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
        utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
+       utf_java_util_Vector           = utf_new_char("java/util/Vector");
 
        utf_InnerClasses               = utf_new_char("InnerClasses");
        utf_ConstantValue              = utf_new_char("ConstantValue");
@@ -184,10 +273,27 @@ void utf8_init(void)
        utf_clinit                         = utf_new_char("<clinit>");
        utf_clone                      = utf_new_char("clone");
        utf_finalize                   = utf_new_char("finalize");
+       utf_run                        = utf_new_char("run");
+
+       utf_add                        = utf_new_char("add");
+       utf_remove                     = utf_new_char("remove");
+       utf_put                        = utf_new_char("put");
+       utf_get                        = utf_new_char("get");
+       utf_value                      = utf_new_char("value");
 
        utf_printStackTrace            = utf_new_char("printStackTrace");
        utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
        utf_loadClass                  = utf_new_char("loadClass");
+       utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
+
+       utf_Z                          = utf_new_char("Z");
+       utf_B                          = utf_new_char("B");
+       utf_C                          = utf_new_char("C");
+       utf_S                          = utf_new_char("S");
+       utf_I                          = utf_new_char("I");
+       utf_J                          = utf_new_char("J");
+       utf_F                          = utf_new_char("F");
+       utf_D                          = utf_new_char("D");
 
        utf_void__void                 = utf_new_char("()V");
        utf_boolean__void              = utf_new_char("(Z)V");
@@ -200,6 +306,13 @@ void utf8_init(void)
        utf_double__void               = utf_new_char("(D)V");
        utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
        utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
+
+       utf_void__java_lang_ClassLoader =
+               utf_new_char("()Ljava/lang/ClassLoader;");
+
+       utf_java_lang_Object__java_lang_Object =
+               utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
+
        utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 
        utf_java_lang_String__java_lang_Class =
@@ -210,6 +323,10 @@ void utf8_init(void)
        utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
 
        array_packagename              = utf_new_char("\t<the array package>");
+
+       /* everything's ok */
+
+       return true;
 }
 
 
@@ -362,116 +479,120 @@ u4 unicode_hashkey(u2 *text, u2 len)
 
 *******************************************************************************/
 
-utf *utf_new_intern(const char *text, u2 length);
-
 utf *utf_new(const char *text, u2 length)
-{
-    utf *r;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_lock();
-#endif
-
-    r = utf_new_intern(text, length);
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_unlock();
-#endif
-
-    return r;
-}
-
-
-utf *utf_new_intern(const char *text, u2 length)
 {
        u4 key;                             /* hashkey computed from utf-text     */
        u4 slot;                            /* slot in hashtable                  */
        utf *u;                             /* hashtable element                  */
        u2 i;
 
-#ifdef STATISTICS
+#if defined(USE_THREADS)
+       builtin_monitorenter(lock_hashtable_utf);
+#endif
+
+#if defined(ENABLE_STATISTICS)
        if (opt_stat)
                count_utf_new++;
 #endif
 
        key  = utf_hashkey(text, length);
-       slot = key & (utf_hash.size - 1);
-       u    = utf_hash.ptr[slot];
+       slot = key & (hashtable_utf.size - 1);
+       u    = hashtable_utf.ptr[slot];
 
        /* search external hash chain for utf-symbol */
+
        while (u) {
                if (u->blength == length) {
-
                        /* compare text of hashtable elements */
+
                        for (i = 0; i < length; i++)
-                               if (text[i] != u->text[i]) goto nomatch;
+                               if (text[i] != u->text[i])
+                                       goto nomatch;
                        
-#ifdef STATISTICS
+#if defined(ENABLE_STATISTICS)
                        if (opt_stat)
                                count_utf_new_found++;
 #endif
 
                        /* symbol found in hashtable */
+
+#if defined(USE_THREADS)
+                       builtin_monitorexit(lock_hashtable_utf);
+#endif
+
                        return u;
                }
+
        nomatch:
                u = u->hashlink; /* next element in external chain */
        }
 
-#ifdef STATISTICS
+#if defined(ENABLE_STATISTICS)
        if (opt_stat)
-               count_utf_len += sizeof(utf) + length;
+               count_utf_len += sizeof(utf) + length + 1;
 #endif
 
        /* location in hashtable found, create new utf element */
        u = NEW(utf);
        u->blength  = length;               /* length in bytes of utfstring       */
-       u->hashlink = utf_hash.ptr[slot];   /* link in external hashchain         */
+       u->hashlink = hashtable_utf.ptr[slot]; /* link in external hashchain      */
        u->text     = mem_alloc(length + 1);/* allocate memory for utf-text       */
+
        memcpy(u->text, text, length);      /* copy utf-text                      */
        u->text[length] = '\0';
-       utf_hash.ptr[slot] = u;             /* insert symbol into table           */
 
-       utf_hash.entries++;                 /* update number of entries           */
+       hashtable_utf.ptr[slot] = u;        /* insert symbol into table           */
+       hashtable_utf.entries++;            /* update number of entries           */
 
-       if (utf_hash.entries > (utf_hash.size * 2)) {
+       if (hashtable_utf.entries > (hashtable_utf.size * 2)) {
 
-        /* reorganization of hashtable, average length of 
-           the external chains is approx. 2                */  
+        /* reorganization of hashtable, average length of the external
+           chains is approx. 2 */
 
-               u4 i;
-               utf *u;
-               hashtable newhash; /* the new hashtable */
+               hashtable  newhash;                              /* the new hashtable */
+               u4         i;
+               utf       *u;
+               utf       *nextu;
+               u4         slot;
 
                /* create new hashtable, double the size */
-               init_hashtable(&newhash, utf_hash.size * 2);
-               newhash.entries = utf_hash.entries;
 
-#ifdef STATISTICS
+               hashtable_create(&newhash, hashtable_utf.size * 2);
+               newhash.entries = hashtable_utf.entries;
+
+#if defined(ENABLE_STATISTICS)
                if (opt_stat)
-                       count_utf_len += sizeof(utf*) * utf_hash.size;
+                       count_utf_len += sizeof(utf*) * hashtable_utf.size;
 #endif
 
                /* transfer elements to new hashtable */
-               for (i = 0; i < utf_hash.size; i++) {
-                       u = (utf *) utf_hash.ptr[i];
+
+               for (i = 0; i < hashtable_utf.size; i++) {
+                       u = hashtable_utf.ptr[i];
+
                        while (u) {
-                               utf *nextu = u->hashlink;
-                               u4 slot = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
+                               nextu = u->hashlink;
+                               slot  = utf_hashkey(u->text, u->blength) & (newhash.size - 1);
                                                
                                u->hashlink = (utf *) newhash.ptr[slot];
                                newhash.ptr[slot] = u;
 
                                /* follow link in external hash chain */
+
                                u = nextu;
                        }
                }
        
                /* dispose old table */
-               MFREE(utf_hash.ptr, void*, utf_hash.size);
-               utf_hash = newhash;
+
+               MFREE(hashtable_utf.ptr, void*, hashtable_utf.size);
+               hashtable_utf = newhash;
        }
 
+#if defined(USE_THREADS)
+       builtin_monitorexit(lock_hashtable_utf);
+#endif
+
        return u;
 }
 
@@ -654,11 +775,11 @@ u4 utf_strlen(utf *u)
        u4 len = 0;                         /* number of unicode characters       */
 
        if (!u) {
-               *exceptionptr = new_nullpointerexception();
+               exceptions_throw_nullpointerexception();
                return 0;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -722,7 +843,7 @@ void utf_display(utf *u)
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -754,7 +875,7 @@ void utf_display_classname(utf *u)
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -786,7 +907,7 @@ void utf_sprint(char *buffer, utf *u)
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) 
@@ -816,7 +937,7 @@ void utf_sprint_classname(char *buffer, utf *u)
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -869,7 +990,7 @@ void utf_fprint(FILE *file, utf *u)
        if (!u)
                return;
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) { 
@@ -896,7 +1017,7 @@ void utf_fprint_classname(FILE *file, utf *u)
     if (!u)
                return;
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) { 
@@ -919,7 +1040,7 @@ void utf_fprint_classname(FILE *file, utf *u)
 
 *******************************************************************************/
 
-static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
+/*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
 
 bool is_valid_utf(char *utf_ptr, char *end_pos)
 {
@@ -962,11 +1083,8 @@ bool is_valid_utf(char *utf_ptr, char *end_pos)
                } else {
                        /* Sun Java seems to allow overlong UTF-8 encodings */
                        
-                       if (v < min_codepoint[len]) { /* overlong UTF-8 */
-                               if (!opt_liberalutf)
-                                       fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
-                               /* XXX change this to exception? */
-                       }
+                       /* if (v < min_codepoint[len]) */
+                               /* XXX throw exception? */
                }
 
                /* surrogates in UTF-8 seem to be allowed in Java classfiles */
@@ -1010,7 +1128,7 @@ bool is_valid_name(char *utf_ptr, char *end_pos)
 
 bool is_valid_name_utf(utf *u)
 {
-       return is_valid_name(u->text,utf_end(u));
+       return is_valid_name(u->text, UTF_END(u));
 }
 
 
@@ -1022,6 +1140,7 @@ bool is_valid_name_utf(utf *u)
 
 *******************************************************************************/
 
+#if !defined(NDEBUG)
 void utf_show(void)
 {
 
@@ -1033,29 +1152,30 @@ void utf_show(void)
        u4 beyond_limit = 0;         /* number of utf-symbols in chains with length>=CHAIN_LIMIT-1 */
        u4 i;
 
-       printf ("UTF-HASH:\n");
+       printf("UTF-HASH:\n");
 
        /* show element of utf-hashtable */
-       for (i=0; i<utf_hash.size; i++) {
-               utf *u = utf_hash.ptr[i];
+
+       for (i = 0; i < hashtable_utf.size; i++) {
+               utf *u = hashtable_utf.ptr[i];
+
                if (u) {
-                       printf ("SLOT %d: ", (int) i);
+                       printf("SLOT %d: ", (int) i);
+
                        while (u) {
-                               printf ("'");
-                               utf_display (u);
-                               printf ("' ");
+                               printf("'");
+                               utf_display(u);
+                               printf("' ");
                                u = u->hashlink;
                        }       
-                       printf ("\n");
+                       printf("\n");
                }
-               
        }
 
-       printf ("UTF-HASH: %d slots for %d entries\n", 
-                       (int) utf_hash.size, (int) utf_hash.entries );
-
+       printf("UTF-HASH: %d slots for %d entries\n", 
+                  (int) hashtable_utf.size, (int) hashtable_utf.entries );
 
-       if (utf_hash.entries == 0)
+       if (hashtable_utf.entries == 0)
                return;
 
        printf("chains:\n  chainlength    number of chains    %% of utfstrings\n");
@@ -1064,9 +1184,9 @@ void utf_show(void)
                chain_count[i]=0;
 
        /* count numbers of hashchains according to their length */
-       for (i=0; i<utf_hash.size; i++) {
+       for (i=0; i<hashtable_utf.size; i++) {
                  
-               utf *u = (utf*) utf_hash.ptr[i];
+               utf *u = (utf*) hashtable_utf.ptr[i];
                u4 chain_length = 0;
 
                /* determine chainlength */
@@ -1094,16 +1214,17 @@ void utf_show(void)
 
        /* display results */  
        for (i=1;i<CHAIN_LIMIT-1;i++) 
-               printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/utf_hash.entries));
+               printf("       %2d %17d %18.2f%%\n",i,chain_count[i],(((float) chain_count[i]*i*100)/hashtable_utf.entries));
          
-       printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/utf_hash.entries);
+       printf("     >=%2d %17d %18.2f%%\n",CHAIN_LIMIT-1,chain_count[CHAIN_LIMIT-1],((float) beyond_limit*100)/hashtable_utf.entries);
 
 
        printf("max. chainlength:%5d\n",max_chainlength);
 
        /* avg. chainlength = sum of chainlengths / number of chains */
-       printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (utf_hash.size-chain_count[0]));
+       printf("avg. chainlength:%5.2f\n",(float) sum_chainlength / (hashtable_utf.size-chain_count[0]));
 }
+#endif /* !defined(NDEBUG) */
 
 
 /*