* asm_calljavafunction: Set noreorder so computing pv is correct.
[cacao.git] / src / vm / utf8.c
index f848c5e9b924e2c936bfdcf383bd7afd73e7ccc6..4f3d6e22aaf23800adf5794e7aedadfcafa281b0 100644 (file)
             Andreas Krall
             Christian Thalinger
 
-   $Id: utf8.c 2097 2005-03-27 18:59:15Z edwin $
+   $Id: utf8.c 3687 2005-11-16 19:13:37Z edwin $
 
 */
 
 
 #include <string.h>
+#include <assert.h>
 
 #include "mm/memory.h"
 #include "vm/exceptions.h"
 #include "vm/options.h"
 #include "vm/statistics.h"
+#include "vm/stringlocal.h"
 #include "vm/tables.h"
 #include "vm/utf8.h"
 
+/* global variables ***********************************************************/
+
+#if defined(USE_THREADS)
+static java_objectheader *lock_utf_hashtable;
+#endif
 
 hashtable utf_hash;                     /* hashtable for utf8-symbols         */
 
@@ -58,14 +65,23 @@ utf *utf_java_lang_Cloneable;
 utf *utf_java_lang_SecurityManager;
 utf *utf_java_lang_String;
 utf *utf_java_lang_System;
+utf *utf_java_lang_ThreadGroup;
 utf *utf_java_io_Serializable;
 
 utf *utf_java_lang_Throwable;
 utf *utf_java_lang_VMThrowable;
-utf *utf_java_lang_Exception;
 utf *utf_java_lang_Error;
+utf *utf_java_lang_NoClassDefFoundError;
+utf *utf_java_lang_NoSuchMethodError;
 utf *utf_java_lang_OutOfMemoryError;
 
+utf *utf_java_lang_Exception;
+utf *utf_java_lang_ClassNotFoundException;
+utf *utf_java_lang_IllegalArgumentException;
+utf *utf_java_lang_IllegalMonitorStateException;
+
+utf *utf_java_lang_NullPointerException;
+
 utf* utf_java_lang_Void;
 utf* utf_java_lang_Boolean;
 utf* utf_java_lang_Byte;
@@ -76,6 +92,10 @@ utf* utf_java_lang_Long;
 utf* utf_java_lang_Float;
 utf* utf_java_lang_Double;
 
+utf *utf_java_lang_StackTraceElement;
+utf *utf_java_lang_reflect_Constructor;
+utf *utf_java_lang_reflect_Field;
+utf *utf_java_lang_reflect_Method;
 utf *utf_java_util_Vector;
 
 utf *utf_InnerClasses;                  /* InnerClasses                       */
@@ -87,11 +107,29 @@ utf *utf_SourceFile;                    /* SourceFile                         */
 
 utf *utf_init;                          /* <init>                             */
 utf *utf_clinit;                        /* <clinit>                           */
+utf *utf_clone;                         /* clone                              */
 utf *utf_finalize;                      /* finalize                           */
+utf *utf_run;                           /* run                                */
+
+utf *utf_add;                           /* add                                */
+utf *utf_remove;                        /* remove                             */
+utf *utf_put;                           /* put                                */
+utf *utf_get;                           /* get                                */
+utf *utf_value;                         /* value                              */
 
-utf *utf_printStackTrace;
 utf *utf_fillInStackTrace;
+utf *utf_getSystemClassLoader;
 utf *utf_loadClass;
+utf *utf_printStackTrace;
+
+utf *utf_Z;                             /* Z                                  */
+utf *utf_B;                             /* B                                  */
+utf *utf_C;                             /* C                                  */
+utf *utf_S;                             /* S                                  */
+utf *utf_I;                             /* I                                  */
+utf *utf_J;                             /* J                                  */
+utf *utf_F;                             /* F                                  */
+utf *utf_D;                             /* D                                  */
 
 utf *utf_void__void;                    /* ()V                                */
 utf *utf_boolean__void;                 /* (Z)V                               */
@@ -102,11 +140,17 @@ utf *utf_int__void;                     /* (I)V                               */
 utf *utf_long__void;                    /* (J)V                               */
 utf *utf_float__void;                   /* (F)V                               */
 utf *utf_double__void;                  /* (D)V                               */
+
+utf *utf_void__java_lang_ClassLoader;   /* ()Ljava/lang/ClassLoader;          */
+utf *utf_void__java_lang_Object;        /* ()Ljava/lang/Object;               */
 utf *utf_void__java_lang_Throwable;     /* ()Ljava/lang/Throwable;            */
+utf *utf_java_lang_Object__java_lang_Object;
 utf *utf_java_lang_String__void;        /* (Ljava/lang/String;)V              */
 utf *utf_java_lang_String__java_lang_Class;
 utf *utf_java_lang_Throwable__void;     /* (Ljava/lang/Throwable;)V           */
 
+utf *utf_not_named_yet;                 /* special name for unnamed classes   */
+
 utf *array_packagename;
 
 
@@ -116,8 +160,18 @@ utf *array_packagename;
 
 *******************************************************************************/
 
-void utf8_init(void)
+bool utf8_init(void)
 {
+#if defined(USE_THREADS)
+       /* create utf hashtable lock object */
+
+       lock_utf_hashtable = NEW(java_objectheader);
+
+# if defined(NATIVE_THREADS)
+       initObjectLock(lock_utf_hashtable);
+# endif
+#endif
+
        /* create utf-symbols for pointer comparison of frequently used strings */
 
        utf_java_lang_Object           = utf_new_char("java/lang/Object");
@@ -128,13 +182,35 @@ void utf8_init(void)
        utf_java_lang_SecurityManager  = utf_new_char("java/lang/SecurityManager");
        utf_java_lang_String           = utf_new_char("java/lang/String");
        utf_java_lang_System           = utf_new_char("java/lang/System");
+       utf_java_lang_ThreadGroup      = utf_new_char("java/lang/ThreadGroup");
        utf_java_io_Serializable       = utf_new_char("java/io/Serializable");
 
-       utf_java_lang_Throwable        = utf_new_char("java/lang/Throwable");
-       utf_java_lang_VMThrowable      = utf_new_char("java/lang/VMThrowable");
-       utf_java_lang_Exception        = utf_new_char("java/lang/Exception");
-       utf_java_lang_Error            = utf_new_char("java/lang/Error");
-       utf_java_lang_OutOfMemoryError = utf_new_char("java/lang/OutOfMemoryError");
+       utf_java_lang_Throwable        = utf_new_char(string_java_lang_Throwable);
+       utf_java_lang_VMThrowable      = utf_new_char(string_java_lang_VMThrowable);
+       utf_java_lang_Error            = utf_new_char(string_java_lang_Error);
+
+       utf_java_lang_NoClassDefFoundError =
+               utf_new_char(string_java_lang_NoClassDefFoundError);
+
+       utf_java_lang_NoSuchMethodError =
+               utf_new_char(string_java_lang_NoSuchMethodError);
+
+       utf_java_lang_OutOfMemoryError =
+               utf_new_char(string_java_lang_OutOfMemoryError);
+
+       utf_java_lang_Exception        = utf_new_char(string_java_lang_Exception);
+
+       utf_java_lang_ClassNotFoundException =
+               utf_new_char(string_java_lang_ClassNotFoundException);
+
+       utf_java_lang_IllegalArgumentException =
+               utf_new_char(string_java_lang_IllegalArgumentException);
+
+       utf_java_lang_IllegalMonitorStateException =
+               utf_new_char(string_java_lang_IllegalMonitorStateException);
+
+       utf_java_lang_NullPointerException =
+               utf_new_char(string_java_lang_NullPointerException);
 
        utf_java_lang_Void             = utf_new_char("java/lang/Void");
        utf_java_lang_Boolean          = utf_new_char("java/lang/Boolean");
@@ -146,6 +222,14 @@ void utf8_init(void)
        utf_java_lang_Float            = utf_new_char("java/lang/Float");
        utf_java_lang_Double           = utf_new_char("java/lang/Double");
 
+       utf_java_lang_StackTraceElement =
+               utf_new_char("java/lang/StackTraceElement");
+
+       utf_java_lang_reflect_Constructor =
+               utf_new_char("java/lang/reflect/Constructor");
+
+       utf_java_lang_reflect_Field    = utf_new_char("java/lang/reflect/Field");
+       utf_java_lang_reflect_Method   = utf_new_char("java/lang/reflect/Method");
        utf_java_util_Vector           = utf_new_char("java/util/Vector");
 
        utf_InnerClasses               = utf_new_char("InnerClasses");
@@ -157,11 +241,29 @@ void utf8_init(void)
 
        utf_init                           = utf_new_char("<init>");
        utf_clinit                         = utf_new_char("<clinit>");
+       utf_clone                      = utf_new_char("clone");
        utf_finalize                   = utf_new_char("finalize");
+       utf_run                        = utf_new_char("run");
+
+       utf_add                        = utf_new_char("add");
+       utf_remove                     = utf_new_char("remove");
+       utf_put                        = utf_new_char("put");
+       utf_get                        = utf_new_char("get");
+       utf_value                      = utf_new_char("value");
 
        utf_printStackTrace            = utf_new_char("printStackTrace");
        utf_fillInStackTrace           = utf_new_char("fillInStackTrace");
        utf_loadClass                  = utf_new_char("loadClass");
+       utf_getSystemClassLoader       = utf_new_char("getSystemClassLoader");
+
+       utf_Z                          = utf_new_char("Z");
+       utf_B                          = utf_new_char("B");
+       utf_C                          = utf_new_char("C");
+       utf_S                          = utf_new_char("S");
+       utf_I                          = utf_new_char("I");
+       utf_J                          = utf_new_char("J");
+       utf_F                          = utf_new_char("F");
+       utf_D                          = utf_new_char("D");
 
        utf_void__void                 = utf_new_char("()V");
        utf_boolean__void              = utf_new_char("(Z)V");
@@ -172,14 +274,29 @@ void utf8_init(void)
        utf_long__void                 = utf_new_char("(J)V");
        utf_float__void                = utf_new_char("(F)V");
        utf_double__void               = utf_new_char("(D)V");
+       utf_void__java_lang_Object     = utf_new_char("()Ljava/lang/Object;");
        utf_void__java_lang_Throwable  = utf_new_char("()Ljava/lang/Throwable;");
+
+       utf_void__java_lang_ClassLoader =
+               utf_new_char("()Ljava/lang/ClassLoader;");
+
+       utf_java_lang_Object__java_lang_Object =
+               utf_new_char("(Ljava/lang/Object;)Ljava/lang/Object;");
+
        utf_java_lang_String__void     = utf_new_char("(Ljava/lang/String;)V");
 
        utf_java_lang_String__java_lang_Class =
                utf_new_char("(Ljava/lang/String;)Ljava/lang/Class;");
 
        utf_java_lang_Throwable__void  = utf_new_char("(Ljava/lang/Throwable;)V");
+
+       utf_not_named_yet              = utf_new_char("\t<not_named_yet>");
+
        array_packagename              = utf_new_char("\t<the array package>");
+
+       /* everything's ok */
+
+       return true;
 }
 
 
@@ -332,33 +449,26 @@ u4 unicode_hashkey(u2 *text, u2 len)
 
 *******************************************************************************/
 
-utf *utf_new_intern(const char *text, u2 length);
-
 utf *utf_new(const char *text, u2 length)
-{
-    utf *r;
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_lock();
-#endif
-
-    r = utf_new_intern(text, length);
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-    tables_unlock();
-#endif
-
-    return r;
-}
-
-
-utf *utf_new_intern(const char *text, u2 length)
 {
        u4 key;                             /* hashkey computed from utf-text     */
        u4 slot;                            /* slot in hashtable                  */
        utf *u;                             /* hashtable element                  */
        u2 i;
 
+       /* XXX REMOVE ME! after testing of course ;-) */
+       static int running = 0;
+       /* XXX REMOVE ME! */
+
+#if defined(USE_THREADS)
+       builtin_monitorenter(lock_utf_hashtable);
+#endif
+
+       /* XXX REMOVE ME! after testing of course ;-) */
+       assert(running == 0);
+       running = 1;
+       /* XXX REMOVE ME! */
+
 #ifdef STATISTICS
        if (opt_stat)
                count_utf_new++;
@@ -369,28 +479,40 @@ utf *utf_new_intern(const char *text, u2 length)
        u    = utf_hash.ptr[slot];
 
        /* search external hash chain for utf-symbol */
+
        while (u) {
                if (u->blength == length) {
-
                        /* compare text of hashtable elements */
+
                        for (i = 0; i < length; i++)
-                               if (text[i] != u->text[i]) goto nomatch;
+                               if (text[i] != u->text[i])
+                                       goto nomatch;
                        
-#ifdef STATISTICS
+#if defined(STATISTICS)
                        if (opt_stat)
                                count_utf_new_found++;
 #endif
 
                        /* symbol found in hashtable */
+
+                       /* XXX REMOVE ME! */
+                       running = 0;
+                       /* XXX REMOVE ME! */
+
+#if defined(USE_THREADS)
+                       builtin_monitorexit(lock_utf_hashtable);
+#endif
+
                        return u;
                }
+
        nomatch:
                u = u->hashlink; /* next element in external chain */
        }
 
-#ifdef STATISTICS
+#if defined(STATISTICS)
        if (opt_stat)
-               count_utf_len += sizeof(utf) + length;
+               count_utf_len += sizeof(utf) + length + 1;
 #endif
 
        /* location in hashtable found, create new utf element */
@@ -442,6 +564,14 @@ utf *utf_new_intern(const char *text, u2 length)
                utf_hash = newhash;
        }
 
+       /* XXX REMOVE ME! */
+       running = 0;
+       /* XXX REMOVE ME! */
+
+#if defined(USE_THREADS)
+       builtin_monitorexit(lock_utf_hashtable);
+#endif
+
        return u;
 }
 
@@ -628,7 +758,7 @@ u4 utf_strlen(utf *u)
                return 0;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -692,7 +822,7 @@ void utf_display(utf *u)
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -724,7 +854,7 @@ void utf_display_classname(utf *u)
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -752,11 +882,11 @@ void utf_sprint(char *buffer, utf *u)
        u2 pos = 0;                         /* position in c-string               */
 
        if (!u) {
-               memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
+               strcpy(buffer, "NULL");
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) 
@@ -782,11 +912,11 @@ void utf_sprint_classname(char *buffer, utf *u)
        u2 pos = 0;                         /* position in c-string               */
 
        if (!u) {
-               memcpy(buffer, "NULL", 5);      /* 4 chars + terminating \0           */
+               strcpy(buffer, "NULL");
                return;
        }
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) {
@@ -801,6 +931,30 @@ void utf_sprint_classname(char *buffer, utf *u)
 }
 
 
+/* utf_strcat ******************************************************************
+       
+   Like libc strcat, but uses an utf8 string.
+
+*******************************************************************************/
+
+void utf_strcat(char *buffer, utf *u)
+{
+       utf_sprint(buffer + strlen(buffer), u);
+}
+
+
+/* utf_strcat_classname ********************************************************
+       
+   Like libc strcat, but uses an utf8 string.
+
+*******************************************************************************/
+
+void utf_strcat_classname(char *buffer, utf *u)
+{
+       utf_sprint_classname(buffer + strlen(buffer), u);
+}
+
+
 /* utf_fprint ******************************************************************
        
    Write utf symbol into file.
@@ -815,7 +969,7 @@ void utf_fprint(FILE *file, utf *u)
        if (!u)
                return;
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) { 
@@ -842,7 +996,7 @@ void utf_fprint_classname(FILE *file, utf *u)
     if (!u)
                return;
 
-       endpos = utf_end(u);
+       endpos = UTF_END(u);
        utf_ptr = u->text;
 
        while (utf_ptr < endpos) { 
@@ -865,7 +1019,7 @@ void utf_fprint_classname(FILE *file, utf *u)
 
 *******************************************************************************/
 
-static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26};
+/*  static unsigned long min_codepoint[6] = {0,1L<<7,1L<<11,1L<<16,1L<<21,1L<<26}; */
 
 bool is_valid_utf(char *utf_ptr, char *end_pos)
 {
@@ -908,11 +1062,8 @@ bool is_valid_utf(char *utf_ptr, char *end_pos)
                } else {
                        /* Sun Java seems to allow overlong UTF-8 encodings */
                        
-                       if (v < min_codepoint[len]) { /* overlong UTF-8 */
-                               if (!opt_liberalutf)
-                                       fprintf(stderr,"WARNING: Overlong UTF-8 sequence found.\n");
-                               /* XXX change this to panic? */
-                       }
+                       /* if (v < min_codepoint[len]) */
+                               /* XXX throw exception? */
                }
 
                /* surrogates in UTF-8 seem to be allowed in Java classfiles */
@@ -956,7 +1107,7 @@ bool is_valid_name(char *utf_ptr, char *end_pos)
 
 bool is_valid_name_utf(utf *u)
 {
-       return is_valid_name(u->text,utf_end(u));
+       return is_valid_name(u->text, UTF_END(u));
 }